gavinzli commited on
Commit
a4e857f
·
1 Parent(s): e83b975

Refactor model initialization and update query handling in main.py; add new playground script for text generation

Browse files
chain/__init__.py CHANGED
@@ -16,16 +16,16 @@ from langchain_huggingface import HuggingFacePipeline
16
 
17
  from models.llm import GPTModel, Phi4MiniONNXLLM, HuggingfaceModel
18
 
19
- llm = GPTModel()
20
- REPO_ID = "microsoft/Phi-4-mini-instruct-onnx"
21
- SUBFOLDER = "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
22
- phi4_llm = Phi4MiniONNXLLM(REPO_ID, SUBFOLDER)
23
 
24
- MODEL_NAME = "openai-community/gpt2"
25
  MODEL_NAME = "microsoft/phi-1_5"
26
- hf_llm = HuggingfaceModel(MODEL_NAME)
27
 
28
- phi4_llm = HuggingFacePipeline.from_model_id(
29
  model_id="microsoft/Phi-4",
30
  task="text-generation",
31
  pipeline_kwargs={
 
16
 
17
  from models.llm import GPTModel, Phi4MiniONNXLLM, HuggingfaceModel
18
 
19
+ # llm = GPTModel()
20
+ # REPO_ID = "microsoft/Phi-4-mini-instruct-onnx"
21
+ # SUBFOLDER = "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
22
+ # llm = Phi4MiniONNXLLM(REPO_ID, SUBFOLDER)
23
 
24
+ # MODEL_NAME = "openai-community/gpt2"
25
  MODEL_NAME = "microsoft/phi-1_5"
26
+ # llm = HuggingfaceModel(MODEL_NAME)
27
 
28
+ hf_llm = HuggingFacePipeline.from_model_id(
29
  model_id="microsoft/Phi-4",
30
  task="text-generation",
31
  pipeline_kwargs={
main.py CHANGED
@@ -11,9 +11,10 @@ if __name__ == "__main__":
11
  # mail.collect()
12
  # mail.get_documents()
13
  req = {
14
- "query": "What is the latest news on the stock market?",
15
  }
16
  chain = RAGChain(DocRetriever(req=req))
17
  result = chain.invoke({"input": req['query']},
18
- config={"configurable": {"session_id": "123"}})
 
19
  print(result.get("answer"))
 
11
  # mail.collect()
12
  # mail.get_documents()
13
  req = {
14
+ "query": "Just give me an update?",
15
  }
16
  chain = RAGChain(DocRetriever(req=req))
17
  result = chain.invoke({"input": req['query']},
18
+ config={"configurable": {"session_id": "20250301"}})
19
+ print(result)
20
  print(result.get("answer"))
models/llm/__init__.py CHANGED
@@ -54,6 +54,7 @@ class Phi4MiniONNXLLM:
54
  Performs inference on the given input data and returns the model's output.
55
  """
56
  def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
 
57
  model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
58
  weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
59
  self.session = ort.InferenceSession(model_path)
@@ -63,10 +64,17 @@ class Phi4MiniONNXLLM:
63
  self.input_name = self.session.get_inputs()[0].name
64
  self.output_name = self.session.get_outputs()[0].name
65
 
66
- def __call__(self, input_ids):
67
  # Assuming input_ids is a tensor or numpy array
68
- outputs = self.session.run([self.output_name], {self.input_name: input_ids})
69
- return outputs[0]
 
 
 
 
 
 
 
70
 
71
  class HuggingfaceModel(HuggingFacePipeline):
72
  """
@@ -81,12 +89,14 @@ class HuggingfaceModel(HuggingFacePipeline):
81
  __init__(name, max_tokens=200):
82
  Initializes the HuggingfaceModel with the specified model name and maximum token limit.
83
  """
84
- def __init__(self, name, max_tokens=200):
85
  super().__init__(pipeline=pipeline(
86
  "text-generation",
87
  model=AutoModelForCausalLM.from_pretrained(name),
88
  tokenizer=AutoTokenizer.from_pretrained(name),
89
- max_new_tokens=max_tokens))
 
 
90
 
91
  # model_name = "microsoft/phi-1_5"
92
  # tokenizer = AutoTokenizer.from_pretrained(model_name)
 
54
  Performs inference on the given input data and returns the model's output.
55
  """
56
  def __init__(self, repo_id, subfolder, onnx_file="model.onnx", weights_file="model.onnx.data"):
57
+ self.repo_id = repo_id
58
  model_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{onnx_file}")
59
  weights_path = hf_hub_download(repo_id=repo_id, filename=f"{subfolder}/{weights_file}")
60
  self.session = ort.InferenceSession(model_path)
 
64
  self.input_name = self.session.get_inputs()[0].name
65
  self.output_name = self.session.get_outputs()[0].name
66
 
67
+ def __call__(self, input_text):
68
  # Assuming input_ids is a tensor or numpy array
69
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-4-mini-instruct-onnx")
70
+ inputs = tokenizer(input_text, return_tensors="pt")
71
+ input_feed = {
72
+ self.input_name: inputs["input_ids"].numpy(),
73
+ "attention_mask": inputs["attention_mask"].numpy(),
74
+ # Add past_key_values if applicable
75
+ }
76
+ outputs = self.session.run([self.output_name], input_feed)
77
+ return outputs
78
 
79
  class HuggingfaceModel(HuggingFacePipeline):
80
  """
 
89
  __init__(name, max_tokens=200):
90
  Initializes the HuggingfaceModel with the specified model name and maximum token limit.
91
  """
92
+ def __init__(self, name, max_tokens=500):
93
  super().__init__(pipeline=pipeline(
94
  "text-generation",
95
  model=AutoModelForCausalLM.from_pretrained(name),
96
  tokenizer=AutoTokenizer.from_pretrained(name),
97
+ max_new_tokens=max_tokens
98
+ )
99
+ )
100
 
101
  # model_name = "microsoft/phi-1_5"
102
  # tokenizer = AutoTokenizer.from_pretrained(model_name)
playground/phi-4-mini-instruct.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_huggingface import HuggingFacePipeline
2
+
3
+ # Define the model ID
4
+ model_id = "gpt2"
5
+ model_id = "microsoft/Phi-4-mini-instruct"
6
+ model_id = "Qwen/Qwen2.5-7B-Instruct"
7
+ model_id = "microsoft/Phi-3-small-8k-instruct"
8
+
9
+ # Create a pipeline for text generation
10
+ llm = HuggingFacePipeline.from_model_id(
11
+ model_id=model_id,
12
+ task="text-generation",
13
+ device=-1,
14
+ # trust_remote_code=True,
15
+ pipeline_kwargs={
16
+ "max_new_tokens": 256,
17
+ "top_k": 50
18
+ },
19
+ )
20
+
21
+ # Use the model to generate text
22
+ response = llm.invoke("Hello, how are you?")
23
+ print(response)