Tamil Eniyan
commited on
Commit
·
0282eea
1
Parent(s):
226641d
Add application file
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ curated_qa_pairs = [
|
|
36 |
},
|
37 |
{
|
38 |
"question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
|
39 |
-
"answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and
|
40 |
},
|
41 |
{
|
42 |
"question": "What are the effectiveness measures used in the PICO framework?",
|
@@ -44,7 +44,7 @@ curated_qa_pairs = [
|
|
44 |
},
|
45 |
{
|
46 |
"question": "What adverse events were monitored in the PICO studies?",
|
47 |
-
"answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects
|
48 |
},
|
49 |
{
|
50 |
"question": "How does Adagrasib compare to immune checkpoint inhibitors?",
|
@@ -55,7 +55,7 @@ curated_qa_pairs = [
|
|
55 |
def get_curated_context(query, curated_qa, embed_model):
|
56 |
"""
|
57 |
Retrieve the most relevant curated Q/A pair based on the user's query.
|
58 |
-
Returns a formatted string if the similarity (
|
59 |
"""
|
60 |
curated_questions = [qa["question"] for qa in curated_qa]
|
61 |
query_embedding = embed_model.encode([query]).astype('float32')
|
@@ -70,19 +70,20 @@ def get_curated_context(query, curated_qa, embed_model):
|
|
70 |
k = 1
|
71 |
distances, indices = curated_index.search(query_embedding, k)
|
72 |
|
73 |
-
# Define a threshold for relevance (tune as needed)
|
74 |
threshold = 1.0
|
75 |
if distances[0][0] < threshold:
|
76 |
idx = indices[0][0]
|
77 |
qa_pair = curated_qa[idx]
|
|
|
78 |
return f"Curated Q/A Pair:\nQuestion: {qa_pair['question']}\nAnswer: {qa_pair['answer']}\n"
|
79 |
else:
|
80 |
return ""
|
81 |
|
82 |
def main():
|
83 |
-
st.title("PDF Question-Answering App
|
84 |
|
85 |
-
# Initialize conversation history if not already
|
86 |
if 'conversation_history' not in st.session_state:
|
87 |
st.session_state.conversation_history = ""
|
88 |
|
@@ -98,31 +99,38 @@ def main():
|
|
98 |
# Append the current question to conversation history
|
99 |
st.session_state.conversation_history += f"User: {query}\n"
|
100 |
|
101 |
-
# Retrieve relevant PDF context using FAISS index
|
102 |
query_embedding = embed_model.encode([query]).astype('float32')
|
103 |
-
k = 3 #
|
104 |
distances, indices = index.search(query_embedding, k)
|
105 |
|
106 |
pdf_context = ""
|
107 |
for idx in indices[0]:
|
108 |
pdf_context += chunks[idx] + "\n"
|
109 |
|
110 |
-
# Get curated Q/A context if
|
111 |
curated_context = get_curated_context(query, curated_qa_pairs, embed_model)
|
112 |
|
113 |
-
|
114 |
-
combined_context = st.session_state.conversation_history + "\n" + curated_context + "\n" + pdf_context
|
115 |
|
116 |
-
# Optionally show the retrieved contexts in expanders
|
117 |
-
with st.expander("Show PDF Retrieved Context"):
|
118 |
-
st.write(pdf_context)
|
119 |
if curated_context:
|
120 |
-
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
st.subheader("Answer:")
|
124 |
try:
|
125 |
-
result = qa_pipeline(question=query, context=
|
126 |
answer = result["answer"]
|
127 |
st.write(answer)
|
128 |
st.session_state.conversation_history += f"AI: {answer}\n"
|
|
|
36 |
},
|
37 |
{
|
38 |
"question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
|
39 |
+
"answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and platinum-based doublet chemotherapy. In PICO 3, the comparator treatment is Docetaxel monotherapy. In PICO 4, the comparator treatment is immune checkpoint inhibitor monotherapy."
|
40 |
},
|
41 |
{
|
42 |
"question": "What are the effectiveness measures used in the PICO framework?",
|
|
|
44 |
},
|
45 |
{
|
46 |
"question": "What adverse events were monitored in the PICO studies?",
|
47 |
+
"answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects. The provided context does not detail them fully; more detailed study data would be needed for a complete list."
|
48 |
},
|
49 |
{
|
50 |
"question": "How does Adagrasib compare to immune checkpoint inhibitors?",
|
|
|
55 |
def get_curated_context(query, curated_qa, embed_model):
|
56 |
"""
|
57 |
Retrieve the most relevant curated Q/A pair based on the user's query.
|
58 |
+
Returns a formatted string if the similarity (using L2 distance) is below a threshold.
|
59 |
"""
|
60 |
curated_questions = [qa["question"] for qa in curated_qa]
|
61 |
query_embedding = embed_model.encode([query]).astype('float32')
|
|
|
70 |
k = 1
|
71 |
distances, indices = curated_index.search(query_embedding, k)
|
72 |
|
73 |
+
# Define a threshold for relevance (tune this value as needed)
|
74 |
threshold = 1.0
|
75 |
if distances[0][0] < threshold:
|
76 |
idx = indices[0][0]
|
77 |
qa_pair = curated_qa[idx]
|
78 |
+
# Return a formatted string with the curated question and answer.
|
79 |
return f"Curated Q/A Pair:\nQuestion: {qa_pair['question']}\nAnswer: {qa_pair['answer']}\n"
|
80 |
else:
|
81 |
return ""
|
82 |
|
83 |
def main():
|
84 |
+
st.title("PDF Question-Answering App")
|
85 |
|
86 |
+
# Initialize conversation history if not already set
|
87 |
if 'conversation_history' not in st.session_state:
|
88 |
st.session_state.conversation_history = ""
|
89 |
|
|
|
99 |
# Append the current question to conversation history
|
100 |
st.session_state.conversation_history += f"User: {query}\n"
|
101 |
|
102 |
+
# Retrieve relevant PDF context using the FAISS index
|
103 |
query_embedding = embed_model.encode([query]).astype('float32')
|
104 |
+
k = 3 # Number of top chunks to retrieve
|
105 |
distances, indices = index.search(query_embedding, k)
|
106 |
|
107 |
pdf_context = ""
|
108 |
for idx in indices[0]:
|
109 |
pdf_context += chunks[idx] + "\n"
|
110 |
|
111 |
+
# Get curated Q/A context if available
|
112 |
curated_context = get_curated_context(query, curated_qa_pairs, embed_model)
|
113 |
|
114 |
+
base_context = st.session_state.conversation_history + "\n"
|
|
|
115 |
|
|
|
|
|
|
|
116 |
if curated_context:
|
117 |
+
st.write("A curated Q/A pair was found and will be used for the answer by default.")
|
118 |
+
# Provide an option to override with full PDF context
|
119 |
+
use_full_data = st.checkbox("Check Full Data", value=False)
|
120 |
+
if use_full_data:
|
121 |
+
context_to_use = base_context + pdf_context
|
122 |
+
else:
|
123 |
+
context_to_use = base_context + curated_context
|
124 |
+
else:
|
125 |
+
context_to_use = base_context + pdf_context
|
126 |
+
|
127 |
+
# Optionally, you can also provide an expander to show the full PDF context.
|
128 |
+
with st.expander("Show Full PDF Context"):
|
129 |
+
st.write(pdf_context)
|
130 |
|
131 |
st.subheader("Answer:")
|
132 |
try:
|
133 |
+
result = qa_pipeline(question=query, context=context_to_use)
|
134 |
answer = result["answer"]
|
135 |
st.write(answer)
|
136 |
st.session_state.conversation_history += f"AI: {answer}\n"
|