Spaces:

SatyamD31
/

cai_assignment_2

Sleeping

App Files Files Community

SatyamD31

anejaprerna commited on Mar 16

Commit

49add26

verified ·

1 Parent(s): 58c74fe

Update rag.py (#2)

Browse files

- Update rag.py (04920e9fbcf79cc0085ee080ad648d41d024dd7b)

Co-authored-by: Prerna Aneja <[email protected]>

Files changed (1) hide show

rag.py +91 -135

rag.py CHANGED Viewed

@@ -1,163 +1,119 @@
-# import time
-import threading
-import pandas as pd
 import faiss
 import numpy as np
-# import numpy as np
 import pickle
 from sentence_transformers import SentenceTransformer
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# import torch
 class FinancialChatbot:
-    def __init__(self, data_path, model_name="all-MiniLM-L6-v2", qwen_model_name="Qwen/Qwen2.5-1.5b"):
-        self.data_path = data_path
-        self.sbert_model = SentenceTransformer(model_name)
-        self.index_map = {}
-        self.faiss_index = None
-#         def get_device_map() -> str:
-#     return 'cuda' if torch.cuda.is_available() else ''
-# device = get_device_map()
-        self.qwen_model = AutoModelForCausalLM.from_pretrained(qwen_model_name, torch_dtype="auto", device_map="cpu", trust_remote_code=True)
-        self.qwen_tokenizer = AutoTokenizer.from_pretrained(qwen_model_name, trust_remote_code=True)
-        self.load_or_create_index()
-    def load_or_create_index(self):
-        try:
-            self.faiss_index = faiss.read_index("financial_faiss.index")
-            with open("index_map.pkl", "rb") as f:
-                self.index_map = pickle.load(f)
-            print("Index loaded successfully!")
-        except:
-            print("Creating new FAISS index...")
-            df = pd.read_excel(self.data_path)
-            sentences = []
-            for index, row in df.iterrows():
-                for col in df.columns[1:]:
-                    text = f"{row[df.columns[0]]} - year {col} is: {row[col]}"
-                    sentences.append(text)
-                    self.index_map[len(sentences) - 1] = text
-            embeddings = self.sbert_model.encode(sentences, convert_to_numpy=True)
-            dim = embeddings.shape[1]
-            self.faiss_index = faiss.IndexFlatL2(dim)
-            self.faiss_index.add(embeddings)
-            faiss.write_index(self.faiss_index, "financial_faiss.index")
-            with open("index_map.pkl", "wb") as f:
-                pickle.dump(self.index_map, f)
-            print("Indexing completed!")
-    # def query_faiss(self, query, top_k=5):
-    #     query_embedding = self.sbert_model.encode([query], convert_to_numpy=True)
-    #     distances, indices = self.faiss_index.search(query_embedding, top_k)
-    #     return [self.index_map[idx] for idx in indices[0] if idx in self.index_map]
-    def query_faiss(self, query, top_k=5):
-        """Retrieve top-k documents from FAISS and return confidence scores."""
-        query_embedding = self.sbert_model.encode([query], convert_to_numpy=True)
-        distances, indices = self.faiss_index.search(query_embedding, top_k)
-        results = []
-        confidences = []
-        if len(distances[0]) > 0:
-            max_dist = np.max(distances[0]) if np.max(distances[0]) != 0 else 1  # Avoid division by zero
-            for idx, dist in zip(indices[0], distances[0]):
-                if idx in self.index_map:
-                    results.append(self.index_map[idx])
-                    confidence = 1 - (dist / max_dist)  # Normalize confidence (closer to 1 is better)
-                    confidences.append(round(confidence, 2))  # Round for clarity
-        return results, confidences
     def moderate_query(self, query):
-        BLOCKED_WORDS = ["hack", "bypass", "illegal", "exploit", "scam", "kill", "laundering", "murder", "suicide", "self-harm"]
-        return not any(word in query.lower() for word in BLOCKED_WORDS)
-    def generate_answer(self, context, question):
-        prompt = f"""
-        You are a financial assistant. If the user greets you (e.g., "Hello," "Hi," "Good morning"), respond politely without requiring context.
-        For financial-related questions, answer based on the context provided. If the context lacks information, say "I don't know."
-        Context: {context}
-        User Query: {question}
-        Answer:
-        """
-        input_text = prompt
-        # f"Context: {context}\nQuestion: {question}\nAnswer:"
         inputs = self.qwen_tokenizer.encode(input_text, return_tensors="pt")
         outputs = self.qwen_model.generate(inputs, max_length=100)
         return self.qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # def get_answer(self, query, timeout=150):
-    #     result = ["", 0.0]  # Placeholder for answer and confidence
-    #     def task():
-    #         if self.moderate_query(query):
-    #             retrieved_docs = self.query_faiss(query)
-    #             context = " ".join(retrieved_docs)
-    #             answer = self.generate_answer(context, query)
-    #             last_index = answer.rfind("Answer")
-    #             if answer[last_index+9:11] == "--":
-    #                 result[:] = ["No relevant information found", 0.0]
-    #             else:
-    #                 result[:] = [answer[last_index:], 0.9]
-    #         else:
-    #             result[:] = ["I'm unable to process your request due to inappropriate language.", 0.0]
-    #     thread = threading.Thread(target=task)
-    #     thread.start()
-    #     thread.join(timeout)
-    #     if thread.is_alive():
-    #         return "Execution exceeded time limit. Stopping function.", 0.0
-    #     return tuple(result)
-    def get_answer(self, query, timeout=150):
-        """Retrieve the best-matched answer along with confidence score, with execution timeout."""
-        result = ["Execution exceeded time limit. Stopping function.", 0.0]  # Default timeout response
         def task():
-            """Processing function to retrieve and generate answer."""
-            if self.moderate_query(query):
-                retrieved_docs, confidences = self.query_faiss(query)  # Get results + confidence scores
-                if not retrieved_docs:  # If no relevant docs found
-                    result[:] = ["No relevant information found", 0.0]
-                    return
-                # Combine retrieved docs and calculate final confidence
-                context = " ".join(retrieved_docs)
-                avg_confidence = round(sum(confidences) / len(confidences), 2)  # Avg confidence
-                answer = self.generate_answer(context, query)
-                last_index = answer.rfind("Answer")
-                if answer[last_index + 9:11] == "--":
-                    result[:] = ["No relevant information found", 0.0]
-                else:
-                    result[:] = [answer[last_index:], avg_confidence]
-            else:
                 result[:] = ["I'm unable to process your request due to inappropriate language.", 0.0]
-        # Start execution in a separate thread
         thread = threading.Thread(target=task)
         thread.start()
-        thread.join(timeout)  # Wait for execution up to timeout
-        # If thread is still running after timeout, return timeout message
         if thread.is_alive():
             return "Execution exceeded time limit. Stopping function.", 0.0
-        return tuple(result)
-# if __name__ == "__main__":
-#     chatbot = FinancialChatbot("C:\\Users\\Dell\\Downloads\\CAI_RAG\\DATA\\Nestle_Financtial_report_till2023.xlsx")
-#     query = "What is the Employees Cost in Dec'20?"
-#     print(chatbot.get_answer(query))

 import faiss
 import numpy as np
 import pickle
+import threading
+import time
+import torch
+import pandas as pd
 from sentence_transformers import SentenceTransformer
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from rank_bm25 import BM25Okapi
 class FinancialChatbot:
+    def __init__(self):
+        # Load financial dataset
+        self.df = pd.read_excel("Nestle_Financtial_report_till2023.xlsx")
+        # Load embedding model
+        self.sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
+        # Load FAISS index
+        self.faiss_index = faiss.read_index("financial_faiss.index")
+        with open("index_map.pkl", "rb") as f:
+            self.index_map = pickle.load(f)
+        # BM25 Indexing
+        self.documents = [" ".join(row) for row in self.df.astype(str).values]
+        self.tokenized_docs = [doc.split() for doc in self.documents]
+        self.bm25 = BM25Okapi(self.tokenized_docs)
+        # Load Qwen model
+        self.qwen_model_name = "Qwen/Qwen2.5-1.5b"
+        self.qwen_model = AutoModelForCausalLM.from_pretrained(self.qwen_model_name, torch_dtype="auto", device_map="auto", trust_remote_code=True)
+        self.qwen_tokenizer = AutoTokenizer.from_pretrained(self.qwen_model_name, trust_remote_code=True)
+        # Guardrail: Blocked words
+        self.BLOCKED_WORDS = ["hack", "bypass", "illegal", "scam", "terrorism", "attack", "suicide", "bomb"]
     def moderate_query(self, query):
+        """Check if the query contains blocked words."""
+        return not any(word in query.lower() for word in self.BLOCKED_WORDS)
+    def query_faiss(self, query, top_k=5):
+        """Retrieve top K relevant documents using FAISS."""
+        query_embedding = self.sbert_model.encode([query], convert_to_numpy=True)
+        distances, indices = self.faiss_index.search(query_embedding, top_k)
+        results = []
+        confidences = []
+        for idx, dist in zip(indices[0], distances[0]):
+            if idx in self.index_map:
+                results.append(self.index_map[idx])
+                confidences.append(1 / (1 + dist))  # Convert distance to confidence
+        return results, confidences
+    def query_bm25(self, query, top_k=5):
+        """Retrieve top K relevant documents using BM25."""
+        tokenized_query = query.split()
+        scores = self.bm25.get_scores(tokenized_query)
+        top_indices = np.argsort(scores)[-top_k:][::-1]
+        results = [self.documents[i] for i in top_indices]
+        confidences = [scores[i] / max(scores) for i in top_indices]  # Normalize confidence
+        return results, confidences
+    def generate_answer(self, context, question):
+        """Generate answer using Qwen model."""
+        input_text = f"Context: {context}\nQuestion: {question}\nAnswer:"
         inputs = self.qwen_tokenizer.encode(input_text, return_tensors="pt")
         outputs = self.qwen_model.generate(inputs, max_length=100)
         return self.qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    def get_answer(self, query, timeout=200):
+        """Fetch an answer using multi-step retrieval and Qwen model, with timeout handling."""
+        result = ["No relevant information found", 0.0]  # Default response
         def task():
+            # Handle greetings
+            if query.lower() in ["hi", "hello", "hey"]:
+                result[:] = ["Hi, how can I help you?", 1.0]
+                return
+            # Guardrail check
+            if not self.moderate_query(query):
                 result[:] = ["I'm unable to process your request due to inappropriate language.", 0.0]
+                return
+            # Multi-step retrieval (BM25 + FAISS)
+            bm25_results, bm25_confidences = self.query_bm25(query, top_k=3)
+            faiss_results, faiss_confidences = self.query_faiss(query, top_k=3)
+            retrieved_docs = bm25_results + faiss_results
+            confidences = bm25_confidences + faiss_confidences
+            if not retrieved_docs:
+                return  # Default response already set
+            # Construct context
+            context = " ".join(retrieved_docs)
+            answer = self.generate_answer(context, query)
+            last_index = answer.rfind("Answer")
+            # Confidence calculation
+            final_confidence = max(confidences) if confidences else 0.0
+            if answer[last_index+9:11] == "--":
+                result[:] = ["No relevant information found", 0.0]
+            else:
+                result[:] = [answer[last_index:], final_confidence]
+        # Run task with timeout
         thread = threading.Thread(target=task)
         thread.start()
+        thread.join(timeout)
         if thread.is_alive():
             return "Execution exceeded time limit. Stopping function.", 0.0
+        return tuple(result)