Spaces:

Eniyan
/

RagLMM

Sleeping

App Files Files Community

Tamil Eniyan commited on Feb 15

Commit

0282eea

1 Parent(s): 226641d

Add application file

Browse files

Files changed (1) hide show

app.py +25 -17

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ curated_qa_pairs = [
     },
     {
         "question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
-        "answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and Platinum-based doublet chemotherapy. In PICO 3, the comparator treatment is Docetaxel monotherapy. In PICO 4, the comparator treatment is Immune checkpoint inhibitor monotherapy."
     },
     {
         "question": "What are the effectiveness measures used in the PICO framework?",
@@ -44,7 +44,7 @@ curated_qa_pairs = [
     },
     {
         "question": "What adverse events were monitored in the PICO studies?",
-        "answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects, though the provided context does not detail them fully. More detailed study data would be required for a complete list."
     },
     {
         "question": "How does Adagrasib compare to immune checkpoint inhibitors?",
@@ -55,7 +55,7 @@ curated_qa_pairs = [
 def get_curated_context(query, curated_qa, embed_model):
     """
     Retrieve the most relevant curated Q/A pair based on the user's query.
-    Returns a formatted string if the similarity (based on L2 distance) is below a threshold.
     """
     curated_questions = [qa["question"] for qa in curated_qa]
     query_embedding = embed_model.encode([query]).astype('float32')
@@ -70,19 +70,20 @@ def get_curated_context(query, curated_qa, embed_model):
     k = 1
     distances, indices = curated_index.search(query_embedding, k)
-    # Define a threshold for relevance (tune as needed)
     threshold = 1.0
     if distances[0][0] < threshold:
         idx = indices[0][0]
         qa_pair = curated_qa[idx]
         return f"Curated Q/A Pair:\nQuestion: {qa_pair['question']}\nAnswer: {qa_pair['answer']}\n"
     else:
         return ""
 def main():
-    st.title("PDF Question-Answering App with Enhanced Context")
-    # Initialize conversation history if not already in session_state
     if 'conversation_history' not in st.session_state:
         st.session_state.conversation_history = ""
@@ -98,31 +99,38 @@ def main():
         # Append the current question to conversation history
         st.session_state.conversation_history += f"User: {query}\n"
-        # Retrieve relevant PDF context using FAISS index
         query_embedding = embed_model.encode([query]).astype('float32')
-        k = 3  # number of top chunks to retrieve
         distances, indices = index.search(query_embedding, k)
         pdf_context = ""
         for idx in indices[0]:
             pdf_context += chunks[idx] + "\n"
-        # Get curated Q/A context if the query matches any curated questions
         curated_context = get_curated_context(query, curated_qa_pairs, embed_model)
-        # Combine conversation history, curated context, and PDF context for the QA pipeline
-        combined_context = st.session_state.conversation_history + "\n" + curated_context + "\n" + pdf_context
-        # Optionally show the retrieved contexts in expanders
-        with st.expander("Show PDF Retrieved Context"):
-            st.write(pdf_context)
         if curated_context:
-            with st.expander("Show Curated Q/A Context"):
-                st.write(curated_context)
         st.subheader("Answer:")
         try:
-            result = qa_pipeline(question=query, context=combined_context)
             answer = result["answer"]
             st.write(answer)
             st.session_state.conversation_history += f"AI: {answer}\n"

     },
     {
         "question": "What are the comparator treatments for Adagrasib in the PICO exercises?",
+        "answer": "In PICOs 1 and 2, the comparator treatments are Sotorasib monotherapy and platinum-based doublet chemotherapy. In PICO 3, the comparator treatment is Docetaxel monotherapy. In PICO 4, the comparator treatment is immune checkpoint inhibitor monotherapy."
     },
     {
         "question": "What are the effectiveness measures used in the PICO framework?",
     },
     {
         "question": "What adverse events were monitored in the PICO studies?",
+        "answer": "The adverse events monitored in the PICO studies include a range of treatment-related side effects. The provided context does not detail them fully; more detailed study data would be needed for a complete list."
     },
     {
         "question": "How does Adagrasib compare to immune checkpoint inhibitors?",
 def get_curated_context(query, curated_qa, embed_model):
     """
     Retrieve the most relevant curated Q/A pair based on the user's query.
+    Returns a formatted string if the similarity (using L2 distance) is below a threshold.
     """
     curated_questions = [qa["question"] for qa in curated_qa]
     query_embedding = embed_model.encode([query]).astype('float32')
     k = 1
     distances, indices = curated_index.search(query_embedding, k)
+    # Define a threshold for relevance (tune this value as needed)
     threshold = 1.0
     if distances[0][0] < threshold:
         idx = indices[0][0]
         qa_pair = curated_qa[idx]
+        # Return a formatted string with the curated question and answer.
         return f"Curated Q/A Pair:\nQuestion: {qa_pair['question']}\nAnswer: {qa_pair['answer']}\n"
     else:
         return ""
 def main():
+    st.title("PDF Question-Answering App")
+    # Initialize conversation history if not already set
     if 'conversation_history' not in st.session_state:
         st.session_state.conversation_history = ""
         # Append the current question to conversation history
         st.session_state.conversation_history += f"User: {query}\n"
+        # Retrieve relevant PDF context using the FAISS index
         query_embedding = embed_model.encode([query]).astype('float32')
+        k = 3  # Number of top chunks to retrieve
         distances, indices = index.search(query_embedding, k)
         pdf_context = ""
         for idx in indices[0]:
             pdf_context += chunks[idx] + "\n"
+        # Get curated Q/A context if available
         curated_context = get_curated_context(query, curated_qa_pairs, embed_model)
+        base_context = st.session_state.conversation_history + "\n"
         if curated_context:
+            st.write("A curated Q/A pair was found and will be used for the answer by default.")
+            # Provide an option to override with full PDF context
+            use_full_data = st.checkbox("Check Full Data", value=False)
+            if use_full_data:
+                context_to_use = base_context + pdf_context
+            else:
+                context_to_use = base_context + curated_context
+        else:
+            context_to_use = base_context + pdf_context
+        # Optionally, you can also provide an expander to show the full PDF context.
+        with st.expander("Show Full PDF Context"):
+            st.write(pdf_context)
         st.subheader("Answer:")
         try:
+            result = qa_pipeline(question=query, context=context_to_use)
             answer = result["answer"]
             st.write(answer)
             st.session_state.conversation_history += f"AI: {answer}\n"