Spaces:

Oxbridge-Economics
/

Mailbox

Restarting

App Files Files Community

gavinzli commited on Mar 27

Commit

e83b975

1 Parent(s): c529966

Enhance model integration and error handling in retriever module

Browse files

Files changed (5) hide show

chain/__init__.py +27 -1
main.py +5 -5
models/llm/__init__.py +71 -0
retriever/__init__.py +28 -22
token.pickle +0 -0

chain/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ import json
 from datetime import datetime
 from venv import logger
 from pymongo import errors
 from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain_core.messages import BaseMessage, message_to_dict
@@ -11,10 +12,35 @@ from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain.chains.retrieval import create_retrieval_chain
 from langchain.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
 from langchain_mongodb import MongoDBChatMessageHistory
-from models.llm import GPTModel
 llm = GPTModel()
 SYS_PROMPT = """You are a knowledgeable financial professional. You can provide well elaborated and credible answers to user queries in economic and finance by referring to retrieved contexts.
             You should answer user queries strictly following the instructions below, and do not provide anything irrelevant. \n

 from datetime import datetime
 from venv import logger
+import torch
 from pymongo import errors
 from langchain_core.runnables.history import RunnableWithMessageHistory
 from langchain_core.messages import BaseMessage, message_to_dict
 from langchain.chains.retrieval import create_retrieval_chain
 from langchain.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
 from langchain_mongodb import MongoDBChatMessageHistory
+from langchain_huggingface import HuggingFacePipeline
+from models.llm import GPTModel, Phi4MiniONNXLLM, HuggingfaceModel
 llm = GPTModel()
+REPO_ID = "microsoft/Phi-4-mini-instruct-onnx"
+SUBFOLDER = "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
+phi4_llm = Phi4MiniONNXLLM(REPO_ID, SUBFOLDER)
+MODEL_NAME = "openai-community/gpt2"
+MODEL_NAME = "microsoft/phi-1_5"
+hf_llm = HuggingfaceModel(MODEL_NAME)
+phi4_llm = HuggingFacePipeline.from_model_id(
+    model_id="microsoft/Phi-4",
+    task="text-generation",
+    pipeline_kwargs={
+        "max_new_tokens": 128,
+        "temperature": 0.3,
+        "top_k": 50,
+        "do_sample": True
+    },
+    model_kwargs={
+        "torch_dtype": "auto",
+        "device_map": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
+        "max_memory": {0: "10GB"},
+        "use_cache": False
+    }
+)
 SYS_PROMPT = """You are a knowledgeable financial professional. You can provide well elaborated and credible answers to user queries in economic and finance by referring to retrieved contexts.
             You should answer user queries strictly following the instructions below, and do not provide anything irrelevant. \n

main.py CHANGED Viewed

@@ -1,19 +1,19 @@
 """Module to run the mail collection process."""
 from dotenv import load_dotenv
-from controllers import mail
 from chain import RAGChain
 from retriever import DocRetriever
 load_dotenv()
 if __name__ == "__main__":
-    mail.collect()
-    mail.get_documents()
     req = {
         "query": "What is the latest news on the stock market?",
     }
     chain = RAGChain(DocRetriever(req=req))
     result = chain.invoke({"input": req['query']},
-                       config={"configurable": {"session_id": "abc"}})
-    print(result)

 """Module to run the mail collection process."""
 from dotenv import load_dotenv
+# from controllers import mail
 from chain import RAGChain
 from retriever import DocRetriever
 load_dotenv()
 if __name__ == "__main__":
+    # mail.collect()
+    # mail.get_documents()
     req = {
         "query": "What is the latest news on the stock market?",
     }
     chain = RAGChain(DocRetriever(req=req))
     result = chain.invoke({"input": req['query']},
+                       config={"configurable": {"session_id": "123"}})
+    print(result.get("answer"))

models/llm/__init__.py CHANGED Viewed

@@ -1,5 +1,10 @@
 """Module for OpenAI model and embeddings."""
 from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
 class GPTModel(AzureChatOpenAI):
     """
@@ -31,3 +36,69 @@ class GPTEmbeddings(AzureOpenAIEmbeddings):
     Methods:
         Inherits all methods from AzureOpenAIEmbeddings.
     """

 """Module for OpenAI model and embeddings."""
+import os
+import onnxruntime as ort
 from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
+from langchain_huggingface import HuggingFacePipeline
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from huggingface_hub import hf_hub_download
 class GPTModel(AzureChatOpenAI):
     """
     Methods:
         Inherits all methods from AzureOpenAIEmbeddings.
     """
+class Phi4MiniONNXLLM:
+    """
+    A class for interfacing with a pre-trained ONNX model for inference.
+    Attributes:
+        session (onnxruntime.InferenceSession): The ONNX runtime inference session for the model.
+        input_name (str): The name of the input node in the ONNX model.
+        output_name (str): The name of the output node in the ONNX model.
+    Methods:
+        __init__(model_path):
+            Initializes the Phi4MiniONNXLLM instance by loading the ONNX model from specified path.
+        __call__(input_ids):
+            Performs inference on the given input data and returns the model's output.
+    """
+    def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
+        model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
+        weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
+        self.session = ort.InferenceSession(model_path)
+        # Verify both files exist
+        print(f"Model path: {model_path}, Exists: {os.path.exists(model_path)}")
+        print(f"Weights path: {weights_path}, Exists: {os.path.exists(weights_path)}")
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_name = self.session.get_outputs()[0].name
+    def __call__(self, input_ids):
+        # Assuming input_ids is a tensor or numpy array
+        outputs = self.session.run([self.output_name], {self.input_name: input_ids})
+        return outputs[0]
+class HuggingfaceModel(HuggingFacePipeline):
+    """
+    HuggingfaceModel is a wrapper class for the Hugging Face text-generation pipeline.
+    Attributes:
+        name (str): The name or path of the pre-trained model to load from Hugging Face.
+        max_tokens (int): The maximum number of new tokens to generate in the text output.
+        Defaults to 200.
+    Methods:
+        __init__(name, max_tokens=200):
+            Initializes the HuggingfaceModel with the specified model name and maximum token limit.
+    """
+    def __init__(self, name, max_tokens=200):
+        super().__init__(pipeline=pipeline(
+            "text-generation",
+            model=AutoModelForCausalLM.from_pretrained(name),
+            tokenizer=AutoTokenizer.from_pretrained(name),
+            max_new_tokens=max_tokens))
+# model_name = "microsoft/phi-1_5"
+# tokenizer = AutoTokenizer.from_pretrained(model_name)
+# model = AutoModelForCausalLM.from_pretrained(model_name)
+# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=200)
+# phi4_llm = HuggingFacePipeline(pipeline=pipe)
+# tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2", pad_token_id=50256)
+# model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")
+# pipe = pipeline(
+#     "text-generation", model=model, tokenizer=tokenizer,
+#       max_new_tokens=10, truncation=True,  # Truncate input sequences
+# )
+# phi4_llm = HuggingFacePipeline(pipeline=pipe)

retriever/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Module for retrievers that fetch documents from various sources."""
 from langchain_core.retrievers import BaseRetriever
 from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_core.documents import Document
@@ -22,9 +23,9 @@ class DocRetriever(BaseRetriever):
                 list: A list of Document objects with relevant metadata.
     """
     retriever: VectorStoreRetriever = None
-    k: int = 10
-    def __init__(self, req, k: int = 10) -> None:
         super().__init__()
         # _filter={}
         # if req.site != []:
@@ -32,30 +33,35 @@ class DocRetriever(BaseRetriever):
         # if req.id != []:
         #     _filter.update({"id": {"$in": req.id}})
         self.retriever = vectorstore.as_retriever(
-            search_type='similarity_score_threshold',
             search_kwargs={
                 "k": k,
                 # "filter": _filter,
-                "score_threshold": .1
             }
         )
     def _get_relevant_documents(self, query: str, *, run_manager) -> list:
-        retrieved_docs = self.retriever.invoke(query)
-        doc_lst = []
-        for doc in retrieved_docs:
-            # date = str(doc.metadata['publishDate'])
-            doc_lst.append(Document(
-                page_content = doc.page_content,
-                metadata = {
-                    "content": doc.page_content,
-                    # "id": doc.metadata['id'],
-                    # "title": doc.metadata['title'],
-                    # "site": doc.metadata['site'],
-                    # "link": doc.metadata['link'],
-                    # "publishDate": doc.metadata['publishDate'].strftime('%Y-%m-%d'),
-                    # 'web': False,
-                    # "source": "Finfast"
-                }
-            ))
-        return doc_lst

 """Module for retrievers that fetch documents from various sources."""
+from venv import logger
 from langchain_core.retrievers import BaseRetriever
 from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_core.documents import Document
                 list: A list of Document objects with relevant metadata.
     """
     retriever: VectorStoreRetriever = None
+    k: int = 5
+    def __init__(self, req, k: int = 2) -> None:
         super().__init__()
         # _filter={}
         # if req.site != []:
         # if req.id != []:
         #     _filter.update({"id": {"$in": req.id}})
         self.retriever = vectorstore.as_retriever(
+            search_type='similarity',
             search_kwargs={
                 "k": k,
                 # "filter": _filter,
+                # "score_threshold": .1
             }
         )
     def _get_relevant_documents(self, query: str, *, run_manager) -> list:
+        try:
+            retrieved_docs = self.retriever.invoke(query)
+            doc_lst = []
+            for doc in retrieved_docs:
+                # date = str(doc.metadata['publishDate'])
+                doc_lst.append(Document(
+                    page_content = doc.page_content,
+                    metadata = {
+                        "content": doc.page_content,
+                        # "id": doc.metadata['id'],
+                        # "title": doc.metadata['title'],
+                        # "site": doc.metadata['site'],
+                        # "link": doc.metadata['link'],
+                        # "publishDate": doc.metadata['publishDate'].strftime('%Y-%m-%d'),
+                        # 'web': False,
+                        # "source": "Finfast"
+                    }
+                ))
+            # print(doc_lst)
+            return doc_lst
+        except RuntimeError as e:
+            logger.error("Error retrieving documents: %s", e)
+            return []

token.pickle CHANGED Viewed

Binary files a/token.pickle and b/token.pickle differ