Tamil Eniyan commited on
Commit
0282eea
·
1 Parent(s): 226641d

Add application file

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -36,7 +36,7 @@ curated_qa_pairs = [
36
  },
37
  {
38
  "question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
39
- "answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and Platinum-based doublet chemotherapy. In PICO 3, the comparator treatment is Docetaxel monotherapy. In PICO 4, the comparator treatment is Immune checkpoint inhibitor monotherapy."
40
  },
41
  {
42
  "question": "What are the effectiveness measures used in the PICO framework?",
@@ -44,7 +44,7 @@ curated_qa_pairs = [
44
  },
45
  {
46
  "question": "What adverse events were monitored in the PICO studies?",
47
- "answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects, though the provided context does not detail them fully. More detailed study data would be required for a complete list."
48
  },
49
  {
50
  "question": "How does Adagrasib compare to immune checkpoint inhibitors?",
@@ -55,7 +55,7 @@ curated_qa_pairs = [
55
  def get_curated_context(query, curated_qa, embed_model):
56
  """
57
  Retrieve the most relevant curated Q/A pair based on the user's query.
58
- Returns a formatted string if the similarity (based on L2 distance) is below a threshold.
59
  """
60
  curated_questions = [qa["question"] for qa in curated_qa]
61
  query_embedding = embed_model.encode([query]).astype('float32')
@@ -70,19 +70,20 @@ def get_curated_context(query, curated_qa, embed_model):
70
  k = 1
71
  distances, indices = curated_index.search(query_embedding, k)
72
 
73
- # Define a threshold for relevance (tune as needed)
74
  threshold = 1.0
75
  if distances[0][0] < threshold:
76
  idx = indices[0][0]
77
  qa_pair = curated_qa[idx]
 
78
  return f"Curated Q/A Pair:\nQuestion: {qa_pair['question']}\nAnswer: {qa_pair['answer']}\n"
79
  else:
80
  return ""
81
 
82
  def main():
83
- st.title("PDF Question-Answering App with Enhanced Context")
84
 
85
- # Initialize conversation history if not already in session_state
86
  if 'conversation_history' not in st.session_state:
87
  st.session_state.conversation_history = ""
88
 
@@ -98,31 +99,38 @@ def main():
98
  # Append the current question to conversation history
99
  st.session_state.conversation_history += f"User: {query}\n"
100
 
101
- # Retrieve relevant PDF context using FAISS index
102
  query_embedding = embed_model.encode([query]).astype('float32')
103
- k = 3 # number of top chunks to retrieve
104
  distances, indices = index.search(query_embedding, k)
105
 
106
  pdf_context = ""
107
  for idx in indices[0]:
108
  pdf_context += chunks[idx] + "\n"
109
 
110
- # Get curated Q/A context if the query matches any curated questions
111
  curated_context = get_curated_context(query, curated_qa_pairs, embed_model)
112
 
113
- # Combine conversation history, curated context, and PDF context for the QA pipeline
114
- combined_context = st.session_state.conversation_history + "\n" + curated_context + "\n" + pdf_context
115
 
116
- # Optionally show the retrieved contexts in expanders
117
- with st.expander("Show PDF Retrieved Context"):
118
- st.write(pdf_context)
119
  if curated_context:
120
- with st.expander("Show Curated Q/A Context"):
121
- st.write(curated_context)
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  st.subheader("Answer:")
124
  try:
125
- result = qa_pipeline(question=query, context=combined_context)
126
  answer = result["answer"]
127
  st.write(answer)
128
  st.session_state.conversation_history += f"AI: {answer}\n"
 
36
  },
37
  {
38
  "question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
39
+ "answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and platinum-based doublet chemotherapy. In PICO 3, the comparator treatment is Docetaxel monotherapy. In PICO 4, the comparator treatment is immune checkpoint inhibitor monotherapy."
40
  },
41
  {
42
  "question": "What are the effectiveness measures used in the PICO framework?",
 
44
  },
45
  {
46
  "question": "What adverse events were monitored in the PICO studies?",
47
+ "answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects. The provided context does not detail them fully; more detailed study data would be needed for a complete list."
48
  },
49
  {
50
  "question": "How does Adagrasib compare to immune checkpoint inhibitors?",
 
55
  def get_curated_context(query, curated_qa, embed_model):
56
  """
57
  Retrieve the most relevant curated Q/A pair based on the user's query.
58
+ Returns a formatted string if the similarity (using L2 distance) is below a threshold.
59
  """
60
  curated_questions = [qa["question"] for qa in curated_qa]
61
  query_embedding = embed_model.encode([query]).astype('float32')
 
70
  k = 1
71
  distances, indices = curated_index.search(query_embedding, k)
72
 
73
+ # Define a threshold for relevance (tune this value as needed)
74
  threshold = 1.0
75
  if distances[0][0] < threshold:
76
  idx = indices[0][0]
77
  qa_pair = curated_qa[idx]
78
+ # Return a formatted string with the curated question and answer.
79
  return f"Curated Q/A Pair:\nQuestion: {qa_pair['question']}\nAnswer: {qa_pair['answer']}\n"
80
  else:
81
  return ""
82
 
83
  def main():
84
+ st.title("PDF Question-Answering App")
85
 
86
+ # Initialize conversation history if not already set
87
  if 'conversation_history' not in st.session_state:
88
  st.session_state.conversation_history = ""
89
 
 
99
  # Append the current question to conversation history
100
  st.session_state.conversation_history += f"User: {query}\n"
101
 
102
+ # Retrieve relevant PDF context using the FAISS index
103
  query_embedding = embed_model.encode([query]).astype('float32')
104
+ k = 3 # Number of top chunks to retrieve
105
  distances, indices = index.search(query_embedding, k)
106
 
107
  pdf_context = ""
108
  for idx in indices[0]:
109
  pdf_context += chunks[idx] + "\n"
110
 
111
+ # Get curated Q/A context if available
112
  curated_context = get_curated_context(query, curated_qa_pairs, embed_model)
113
 
114
+ base_context = st.session_state.conversation_history + "\n"
 
115
 
 
 
 
116
  if curated_context:
117
+ st.write("A curated Q/A pair was found and will be used for the answer by default.")
118
+ # Provide an option to override with full PDF context
119
+ use_full_data = st.checkbox("Check Full Data", value=False)
120
+ if use_full_data:
121
+ context_to_use = base_context + pdf_context
122
+ else:
123
+ context_to_use = base_context + curated_context
124
+ else:
125
+ context_to_use = base_context + pdf_context
126
+
127
+ # Optionally, you can also provide an expander to show the full PDF context.
128
+ with st.expander("Show Full PDF Context"):
129
+ st.write(pdf_context)
130
 
131
  st.subheader("Answer:")
132
  try:
133
+ result = qa_pipeline(question=query, context=context_to_use)
134
  answer = result["answer"]
135
  st.write(answer)
136
  st.session_state.conversation_history += f"AI: {answer}\n"