Spaces:

Eniyan
/

RagLMM

Sleeping

App Files Files Community

Tamil Eniyan commited on Feb 15

Commit

d8ffd44

1 Parent(s): 6170969

Add application file

Browse files

Files changed (1) hide show

app.py +98 -13

app.py CHANGED Viewed

@@ -4,16 +4,23 @@ import numpy as np
 import pickle
 import json
 from sentence_transformers import SentenceTransformer
-from transformers import pipeline
-# File names for saved PDF-based data and curated Q/A pairs
 INDEX_FILE = "faiss_index.index"
 CHUNKS_FILE = "chunks.pkl"
 CURATED_QA_FILE = "curated_qa_pairs.json"
-# Models
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
-QA_MODEL_NAME = "deepset/roberta-large-squad2"  # More powerful QA model
 @st.cache_resource
 def load_index_and_chunks():
@@ -36,6 +43,10 @@ def load_curated_qa_pairs(json_file=CURATED_QA_FILE):
         curated_qa_pairs = json.load(f)
     return curated_qa_pairs
 def get_curated_pair(query, curated_qa, embed_model, threshold=1.0):
     """
     Retrieve the most relevant curated Q/A pair based on the user's query.
@@ -61,14 +72,81 @@ def get_curated_pair(query, curated_qa, embed_model, threshold=1.0):
     else:
         return None
 def main():
     st.title("Takalama")
-    # Initialize conversation history if not already set
     if 'conversation_history' not in st.session_state:
         st.session_state.conversation_history = ""
-    # Load PDF index, chunks, models, and curated Q/A pairs
     index, chunks = load_index_and_chunks()
     embed_model = load_embedding_model()
     qa_pipeline = load_qa_pipeline()
@@ -78,39 +156,46 @@ def main():
     query = st.text_input("Question:")
     if query:
-        # Append the current question to conversation history
         st.session_state.conversation_history += f"User: {query}\n"
         # Retrieve relevant PDF context using the FAISS index
         query_embedding = embed_model.encode([query]).astype('float32')
         k = 3  # Number of top chunks to retrieve
         distances, indices = index.search(query_embedding, k)
         pdf_context = ""
         for idx in indices[0]:
             pdf_context += chunks[idx] + "\n"
         base_context = st.session_state.conversation_history + "\n"
         # Check for a curated Q/A pair
         curated_pair = get_curated_pair(query, curated_qa_pairs, embed_model)
         if curated_pair:
             st.write("A curated Q/A pair was found and will be used for the answer by default.")
-            # Option to override with full PDF context, now labeled as "High Reasoning"
             use_full_data = st.checkbox("High Reasoning", value=False)
             if not use_full_data:
-                # Directly display the curated answer without running the QA pipeline
                 answer = curated_pair["answer"]
                 st.write(answer)
                 st.session_state.conversation_history += f"AI: {answer}\n"
-                return  # Exit the function after displaying the curated answer
             else:
                 context_to_use = base_context + pdf_context
         else:
             context_to_use = base_context + pdf_context
-        # Provide an expander to show the full PDF context if desired
         with st.expander("Show Full PDF Context"):
             st.write(pdf_context)

 import pickle
 import json
 from sentence_transformers import SentenceTransformer
+from transformers import pipeline, RagTokenizer, RagRetriever, RagSequenceForGeneration
+import torch
+# ========================
+# File Names & Model Names
+# ========================
 INDEX_FILE = "faiss_index.index"
 CHUNKS_FILE = "chunks.pkl"
 CURATED_QA_FILE = "curated_qa_pairs.json"
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+QA_MODEL_NAME = "deepset/roberta-large-squad2"  # For the standard QA pipeline
+# ========================
+# Loading Functions (cached)
+# ========================
 @st.cache_resource
 def load_index_and_chunks():
         curated_qa_pairs = json.load(f)
     return curated_qa_pairs
+# ========================================
+# Standard: Retrieve Curated Q/A Pair Function
+# ========================================
 def get_curated_pair(query, curated_qa, embed_model, threshold=1.0):
     """
     Retrieve the most relevant curated Q/A pair based on the user's query.
     else:
         return None
+# ============================================================
+# Custom RAG Retriever: Uses your FAISS index & PDF passages
+# ============================================================
+class CustomRagRetriever(RagRetriever):
+    """
+    A custom retriever that uses your FAISS index and passages.
+    It encodes the query with the provided embedding model,
+    searches your FAISS index, and returns the top retrieved documents.
+    """
+    def __init__(self, faiss_index, passages, embed_model, tokenizer, n_docs=5):
+        self.faiss_index = faiss_index      # FAISS index of PDF embeddings
+        self.passages = passages            # List of passage texts from your PDF
+        self.embed_model = embed_model      # Embedding model to encode queries
+        self.n_docs = n_docs                # Number of top documents to retrieve
+        # Provide a dummy dataset to satisfy the base class.
+        dummy_dataset = {"title": ["dummy"], "text": ["dummy"]}
+        super().__init__(dummy_dataset, tokenizer=tokenizer, index_name="custom")
+    def retrieve(self, query, n_docs=None):
+        if n_docs is None:
+            n_docs = self.n_docs
+        # Encode the query using your embedding model
+        query_embedding = self.embed_model.encode([query]).astype('float32')
+        distances, indices = self.faiss_index.search(query_embedding, n_docs)
+        # Retrieve the passages using the indices
+        retrieved_docs = [self.passages[i] for i in indices[0]]
+        return {
+            "doc_ids": indices,
+            "doc_scores": distances,
+            "retrieved_docs": retrieved_docs,
+        }
+# ============================================================
+# Load RAG Model with Custom Retriever (cached for performance)
+# ============================================================
+@st.cache_resource
+def load_rag_model(faiss_index, passages, embed_model):
+    # Load the RAG tokenizer and base model
+    tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
+    rag_model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq")
+    # Create the custom retriever using your FAISS index, passages, and embedding model
+    custom_retriever = CustomRagRetriever(
+        faiss_index=faiss_index,
+        passages=passages,
+        embed_model=embed_model,
+        tokenizer=tokenizer,
+        n_docs=5
+    )
+    # Set the custom retriever in the RAG model
+    rag_model.set_retriever(custom_retriever)
+    return tokenizer, rag_model
+def generate_rag_answer(query, tokenizer, rag_model):
+    inputs = tokenizer(query, return_tensors="pt")
+    with torch.no_grad():
+        generated_ids = rag_model.generate(**inputs)
+    answer = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return answer
+# ========================================
+# Main Streamlit App
+# ========================================
 def main():
     st.title("Takalama")
+    # Initialize conversation history if not present
     if 'conversation_history' not in st.session_state:
         st.session_state.conversation_history = ""
+    # Load necessary data and models
     index, chunks = load_index_and_chunks()
     embed_model = load_embedding_model()
     qa_pipeline = load_qa_pipeline()
     query = st.text_input("Question:")
     if query:
+        # Append question to conversation history
         st.session_state.conversation_history += f"User: {query}\n"
         # Retrieve relevant PDF context using the FAISS index
         query_embedding = embed_model.encode([query]).astype('float32')
         k = 3  # Number of top chunks to retrieve
         distances, indices = index.search(query_embedding, k)
         pdf_context = ""
         for idx in indices[0]:
             pdf_context += chunks[idx] + "\n"
         base_context = st.session_state.conversation_history + "\n"
+        # --- Option 1: Use RAG Model with Custom Retriever ---
+        if st.button("Use RAG Model with Custom Retriever"):
+            tokenizer_rag, rag_model = load_rag_model(index, chunks, embed_model)
+            rag_answer = generate_rag_answer(query, tokenizer_rag, rag_model)
+            st.write("**RAG Model Answer:**")
+            st.write(rag_answer)
+            st.session_state.conversation_history += f"AI (RAG): {rag_answer}\n"
+            return  # Exit after using the RAG answer
+        # --- Option 2: Use Standard QA Pipeline with Curated Q/A Pairs ---
         # Check for a curated Q/A pair
         curated_pair = get_curated_pair(query, curated_qa_pairs, embed_model)
         if curated_pair:
             st.write("A curated Q/A pair was found and will be used for the answer by default.")
+            # Option to override with full PDF context ("High Reasoning")
             use_full_data = st.checkbox("High Reasoning", value=False)
             if not use_full_data:
                 answer = curated_pair["answer"]
                 st.write(answer)
                 st.session_state.conversation_history += f"AI: {answer}\n"
+                return  # Exit after displaying the curated answer
             else:
                 context_to_use = base_context + pdf_context
         else:
             context_to_use = base_context + pdf_context
+        # Optionally, show the full PDF context in an expander
         with st.expander("Show Full PDF Context"):
             st.write(pdf_context)