Spaces:

Athspi
/

Ggyy

Sleeping

App Files Files Community

Athspi commited on 25 days ago

Commit

346197d

verified ·

1 Parent(s): 5c7d9ff

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -20

app.py CHANGED Viewed

@@ -1,46 +1,61 @@
 import gradio as gr
 from transformers import AutoTokenizer
 import onnxruntime as ort
-import numpy as np
-# Local model directory
-model_dir = "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
-# Load tokenizer and ONNX model
-tokenizer = AutoTokenizer.from_pretrained(model_dir)
-session = ort.InferenceSession(f"{model_dir}/model.onnx", providers=["CPUExecutionProvider"])
-# Inference function
 def generate_response(prompt):
     full_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n"
     inputs = tokenizer(full_prompt, return_tensors="np")
-    # ONNX model expects input_ids and attention_mask
     ort_inputs = {
         "input_ids": inputs["input_ids"].astype(np.int64),
         "attention_mask": inputs["attention_mask"].astype(np.int64)
     }
-    # Run model
     outputs = session.run(None, ort_inputs)
     generated_ids = outputs[0]
-    # Decode output
     response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-    # Clean response
     if "<|assistant|>" in response:
         response = response.split("<|assistant|>")[-1].strip()
     return response
-# Gradio interface
-demo = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(label="Your Prompt", placeholder="Type your question here...", lines=4),
     outputs=gr.Textbox(label="AI Response"),
     title="Phi-4-Mini ONNX Chatbot",
-    description="Runs locally with ONNX for fast inference (int4 optimized)."
 )
-# Launch the app
-demo.launch()

 import gradio as gr
+import numpy as np
 from transformers import AutoTokenizer
 import onnxruntime as ort
+# Load the tokenizer from the Hugging Face hub.
+# This loads files like `tokenizer.json`, `vocab.json`, etc. from the repository root.
+model_repo = "microsoft/Phi-4-mini-instruct-onnx"
+tokenizer = AutoTokenizer.from_pretrained(model_repo)
+# Specify the relative path to the ONNX model files stored in the repository subfolder.
+# You need to have downloaded these LFS files either locally or ensure your environment can access them.
+onnx_model_path = "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx"
+# Create an ONNX Runtime session.
+session = ort.InferenceSession(onnx_model_path, providers=["CPUExecutionProvider"])
 def generate_response(prompt):
+    # Prepare the prompt with a simple instruction format.
     full_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n"
+    # Tokenize the input.
+    # The tokenizer returns NumPy arrays (using return_tensors="np").
     inputs = tokenizer(full_prompt, return_tensors="np")
+    # ONNX runtime requires inputs of type int64.
     ort_inputs = {
         "input_ids": inputs["input_ids"].astype(np.int64),
         "attention_mask": inputs["attention_mask"].astype(np.int64)
     }
+    # Run the model inference.
     outputs = session.run(None, ort_inputs)
+    # Assuming the model returns logits or generated IDs in the first element.
+    # Here we assume the model output contains generated token IDs.
     generated_ids = outputs[0]
+    # Decode the generated token IDs into text.
     response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    # Optionally, remove earlier prompt parts if your model returns the input tokens as well.
     if "<|assistant|>" in response:
         response = response.split("<|assistant|>")[-1].strip()
     return response
+# Create a Gradio interface to interact with the model.
+interface = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(label="Your Prompt", placeholder="Type your question here...", lines=4),
     outputs=gr.Textbox(label="AI Response"),
     title="Phi-4-Mini ONNX Chatbot",
+    description=(
+        "Chat interface powered by microsoft/Phi-4-mini-instruct-onnx. "
+        "The ONNX model is loaded from the int4-optimized subfolder (cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4)."
+    )
 )
+# Launch the Gradio app.
+interface.launch()