Spaces:

Oxbridge-Economics
/

Mailbox

Sleeping

App Files Files Community

gavinzli commited on Mar 27

Commit

a4e857f

1 Parent(s): e83b975

Refactor model initialization and update query handling in main.py; add new playground script for text generation

Browse files

Files changed (4) hide show

chain/__init__.py +7 -7
main.py +3 -2
models/llm/__init__.py +15 -5
playground/phi-4-mini-instruct.py +23 -0

chain/__init__.py CHANGED Viewed

@@ -16,16 +16,16 @@ from langchain_huggingface import HuggingFacePipeline
 from models.llm import GPTModel, Phi4MiniONNXLLM, HuggingfaceModel
-llm = GPTModel()
-REPO_ID = "microsoft/Phi-4-mini-instruct-onnx"
-SUBFOLDER = "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
-phi4_llm = Phi4MiniONNXLLM(REPO_ID, SUBFOLDER)
-MODEL_NAME = "openai-community/gpt2"
 MODEL_NAME = "microsoft/phi-1_5"
-hf_llm = HuggingfaceModel(MODEL_NAME)
-phi4_llm = HuggingFacePipeline.from_model_id(
     model_id="microsoft/Phi-4",
     task="text-generation",
     pipeline_kwargs={

 from models.llm import GPTModel, Phi4MiniONNXLLM, HuggingfaceModel
+# llm = GPTModel()
+# REPO_ID = "microsoft/Phi-4-mini-instruct-onnx"
+# SUBFOLDER = "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
+# llm = Phi4MiniONNXLLM(REPO_ID, SUBFOLDER)
+# MODEL_NAME = "openai-community/gpt2"
 MODEL_NAME = "microsoft/phi-1_5"
+# llm = HuggingfaceModel(MODEL_NAME)
+hf_llm = HuggingFacePipeline.from_model_id(
     model_id="microsoft/Phi-4",
     task="text-generation",
     pipeline_kwargs={

main.py CHANGED Viewed

@@ -11,9 +11,10 @@ if __name__ == "__main__":
     # mail.collect()
     # mail.get_documents()
     req = {
-        "query": "What is the latest news on the stock market?",
     }
     chain = RAGChain(DocRetriever(req=req))
     result = chain.invoke({"input": req['query']},
-                       config={"configurable": {"session_id": "123"}})
     print(result.get("answer"))

     # mail.collect()
     # mail.get_documents()
     req = {
+        "query": "Just give me an update?",
     }
     chain = RAGChain(DocRetriever(req=req))
     result = chain.invoke({"input": req['query']},
+                       config={"configurable": {"session_id": "20250301"}})
+    print(result)
     print(result.get("answer"))

models/llm/__init__.py CHANGED Viewed

@@ -54,6 +54,7 @@ class Phi4MiniONNXLLM:
             Performs inference on the given input data and returns the model's output.
     """
     def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
         model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
         weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
         self.session = ort.InferenceSession(model_path)
@@ -63,10 +64,17 @@ class Phi4MiniONNXLLM:
         self.input_name = self.session.get_inputs()[0].name
         self.output_name = self.session.get_outputs()[0].name
-    def __call__(self, input_ids):
         # Assuming input_ids is a tensor or numpy array
-        outputs = self.session.run([self.output_name], {self.input_name: input_ids})
-        return outputs[0]
 class HuggingfaceModel(HuggingFacePipeline):
     """
@@ -81,12 +89,14 @@ class HuggingfaceModel(HuggingFacePipeline):
         __init__(name, max_tokens=200):
             Initializes the HuggingfaceModel with the specified model name and maximum token limit.
     """
-    def __init__(self, name, max_tokens=200):
         super().__init__(pipeline=pipeline(
             "text-generation",
             model=AutoModelForCausalLM.from_pretrained(name),
             tokenizer=AutoTokenizer.from_pretrained(name),
-            max_new_tokens=max_tokens))
 # model_name = "microsoft/phi-1_5"
 # tokenizer = AutoTokenizer.from_pretrained(model_name)

             Performs inference on the given input data and returns the model's output.
     """
     def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
+        self.repo_id = repo_id
         model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
         weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
         self.session = ort.InferenceSession(model_path)
         self.input_name = self.session.get_inputs()[0].name
         self.output_name = self.session.get_outputs()[0].name
+    def __call__(self, input_text):
         # Assuming input_ids is a tensor or numpy array
+        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
+        inputs = tokenizer(input_text, return_tensors="pt")
+        input_feed = {
+            self.input_name: inputs["input_ids"].numpy(),
+            "attention_mask": inputs["attention_mask"].numpy(),
+            # Add past_key_values if applicable
+        }
+        outputs = self.session.run([self.output_name], input_feed)
+        return outputs
 class HuggingfaceModel(HuggingFacePipeline):
     """
         __init__(name, max_tokens=200):
             Initializes the HuggingfaceModel with the specified model name and maximum token limit.
     """
+    def __init__(self, name, max_tokens=500):
         super().__init__(pipeline=pipeline(
             "text-generation",
             model=AutoModelForCausalLM.from_pretrained(name),
             tokenizer=AutoTokenizer.from_pretrained(name),
+            max_new_tokens=max_tokens
+            )
+        )
 # model_name = "microsoft/phi-1_5"
 # tokenizer = AutoTokenizer.from_pretrained(model_name)

playground/phi-4-mini-instruct.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from langchain_huggingface import HuggingFacePipeline
+# Define the model ID
+model_id = "gpt2"
+model_id = "microsoft/Phi-4-mini-instruct"
+model_id = "Qwen/Qwen2.5-7B-Instruct"
+model_id = "microsoft/Phi-3-small-8k-instruct"
+# Create a pipeline for text generation
+llm = HuggingFacePipeline.from_model_id(
+    model_id=model_id,
+    task="text-generation",
+    device=-1,
+    # trust_remote_code=True,
+    pipeline_kwargs={
+        "max_new_tokens": 256,
+        "top_k": 50
+    },
+)
+# Use the model to generate text
+response = llm.invoke("Hello, how are you?")
+print(response)