Tamil Eniyan
commited on
Commit
·
6170969
1
Parent(s):
c521252
Add application file
Browse files- app.py +12 -27
- curated_qa_pairs.json +22 -0
app.py
CHANGED
@@ -2,12 +2,14 @@ import streamlit as st
|
|
2 |
import faiss
|
3 |
import numpy as np
|
4 |
import pickle
|
|
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
from transformers import pipeline
|
7 |
|
8 |
-
# File names for saved PDF-based data
|
9 |
INDEX_FILE = "faiss_index.index"
|
10 |
CHUNKS_FILE = "chunks.pkl"
|
|
|
11 |
|
12 |
# Models
|
13 |
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
|
@@ -28,29 +30,11 @@ def load_embedding_model():
|
|
28 |
def load_qa_pipeline():
|
29 |
return pipeline("question-answering", model=QA_MODEL_NAME, tokenizer=QA_MODEL_NAME)
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
},
|
37 |
-
{
|
38 |
-
"question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
|
39 |
-
"answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and platinum-based doublet chemotherapy. In PICO 3, the comparator treatment is Docetaxel monotherapy. In PICO 4, the comparator treatment is immune checkpoint inhibitor monotherapy."
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"question": "What are the effectiveness measures used in the PICO framework?",
|
43 |
-
"answer": "Effectiveness measures in the PICO framework typically include outcomes such as overall response rate, progression-free survival, and overall survival. In these PICOs, Adagrasib is used as monotherapy in PICOs 1 and 2, and as monotherapy or in combination with platinum-based chemotherapy in PICO 3, while in PICO 4 it may be used as monotherapy or in combination therapy."
|
44 |
-
},
|
45 |
-
{
|
46 |
-
"question": "What adverse events were monitored in the PICO studies?",
|
47 |
-
"answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects. The provided context does not detail them fully; more detailed study data would be needed for a complete list."
|
48 |
-
},
|
49 |
-
{
|
50 |
-
"question": "How does Adagrasib compare to immune checkpoint inhibitors?",
|
51 |
-
"answer": "The analysis suggests that comparisons between Adagrasib and immune checkpoint inhibitors (or Docetaxel) should account for patients' prior treatment lines. Direct comparisons may not be relevant in certain subgroups, especially in patients with only one prior line of systemic therapy."
|
52 |
-
}
|
53 |
-
]
|
54 |
|
55 |
def get_curated_pair(query, curated_qa, embed_model, threshold=1.0):
|
56 |
"""
|
@@ -84,10 +68,11 @@ def main():
|
|
84 |
if 'conversation_history' not in st.session_state:
|
85 |
st.session_state.conversation_history = ""
|
86 |
|
87 |
-
# Load PDF index, chunks, and
|
88 |
index, chunks = load_index_and_chunks()
|
89 |
embed_model = load_embedding_model()
|
90 |
qa_pipeline = load_qa_pipeline()
|
|
|
91 |
|
92 |
st.write("Enter your question about the PDF document:")
|
93 |
query = st.text_input("Question:")
|
@@ -112,8 +97,8 @@ def main():
|
|
112 |
|
113 |
if curated_pair:
|
114 |
st.write("A curated Q/A pair was found and will be used for the answer by default.")
|
115 |
-
# Option to override with full PDF context
|
116 |
-
use_full_data = st.checkbox("
|
117 |
if not use_full_data:
|
118 |
# Directly display the curated answer without running the QA pipeline
|
119 |
answer = curated_pair["answer"]
|
|
|
2 |
import faiss
|
3 |
import numpy as np
|
4 |
import pickle
|
5 |
+
import json
|
6 |
from sentence_transformers import SentenceTransformer
|
7 |
from transformers import pipeline
|
8 |
|
9 |
+
# File names for saved PDF-based data and curated Q/A pairs
|
10 |
INDEX_FILE = "faiss_index.index"
|
11 |
CHUNKS_FILE = "chunks.pkl"
|
12 |
+
CURATED_QA_FILE = "curated_qa_pairs.json"
|
13 |
|
14 |
# Models
|
15 |
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
|
|
|
30 |
def load_qa_pipeline():
|
31 |
return pipeline("question-answering", model=QA_MODEL_NAME, tokenizer=QA_MODEL_NAME)
|
32 |
|
33 |
+
@st.cache_resource
|
34 |
+
def load_curated_qa_pairs(json_file=CURATED_QA_FILE):
|
35 |
+
with open(json_file, "r", encoding="utf-8") as f:
|
36 |
+
curated_qa_pairs = json.load(f)
|
37 |
+
return curated_qa_pairs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
def get_curated_pair(query, curated_qa, embed_model, threshold=1.0):
|
40 |
"""
|
|
|
68 |
if 'conversation_history' not in st.session_state:
|
69 |
st.session_state.conversation_history = ""
|
70 |
|
71 |
+
# Load PDF index, chunks, models, and curated Q/A pairs
|
72 |
index, chunks = load_index_and_chunks()
|
73 |
embed_model = load_embedding_model()
|
74 |
qa_pipeline = load_qa_pipeline()
|
75 |
+
curated_qa_pairs = load_curated_qa_pairs()
|
76 |
|
77 |
st.write("Enter your question about the PDF document:")
|
78 |
query = st.text_input("Question:")
|
|
|
97 |
|
98 |
if curated_pair:
|
99 |
st.write("A curated Q/A pair was found and will be used for the answer by default.")
|
100 |
+
# Option to override with full PDF context, now labeled as "High Reasoning"
|
101 |
+
use_full_data = st.checkbox("High Reasoning", value=False)
|
102 |
if not use_full_data:
|
103 |
# Directly display the curated answer without running the QA pipeline
|
104 |
answer = curated_pair["answer"]
|
curated_qa_pairs.json
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"question": "What is Adagrasib (Krazati®) used for?",
|
4 |
+
"answer": "Adagrasib (Krazati®) is used for the treatment of adult patients with advanced non-small cell lung cancer (NSCLC) with KRAS G12C mutation and disease progression after at least one prior systemic therapy."
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
|
8 |
+
"answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and platinum-based doublet chemotherapy. In PICO 3, the comparator treatment is Docetaxel monotherapy. In PICO 4, the comparator treatment is immune checkpoint inhibitor monotherapy."
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"question": "What are the effectiveness measures used in the PICO framework?",
|
12 |
+
"answer": "Effectiveness measures in the PICO framework typically include outcomes such as overall response rate, progression-free survival, and overall survival. In these PICOs, Adagrasib is used as monotherapy in PICOs 1 and 2, and as monotherapy or in combination with platinum-based chemotherapy in PICO 3, while in PICO 4 it may be used as monotherapy or in combination therapy."
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"question": "What adverse events were monitored in the PICO studies?",
|
16 |
+
"answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects. The provided context does not detail them fully; more detailed study data would be needed for a complete list."
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"question": "How does Adagrasib compare to immune checkpoint inhibitors?",
|
20 |
+
"answer": "The analysis suggests that comparisons between Adagrasib and immune checkpoint inhibitors (or Docetaxel) should account for patients' prior treatment lines. Direct comparisons may not be relevant in certain subgroups, especially in patients with only one prior line of systemic therapy."
|
21 |
+
}
|
22 |
+
]
|