Spaces:

Yoxas
/

testchatbot

Runtime error

Yoxas commited on May 27, 2024

Commit

0853b21

verified ·

1 Parent(s): 384b004

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,15 +1,18 @@
 import gradio as gr
 from transformers import pipeline
 import spaces
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 try:
-    pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", framework="pt", trust_remote_code=True)
 except Exception as e:
     print(f"Error loading model: {e}")
-    pipe = None
 @spaces.GPU(duration=120)
 def respond(
@@ -20,6 +23,9 @@ def respond(
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     for val in history:
@@ -30,17 +36,13 @@ def respond(
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in pipe(
-        messages,
-        max_length=max_tokens,
-        do_sample=True,
-        temperature=temperature,
-        top_p=top_p,
-        return_dict_in_generate=True,
-    ):
-        token = message["generated_text"]
         response += token
         yield response

 import gradio as gr
 from transformers import pipeline
 import spaces
+import sentencepiece
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 try:
+    tokenizer = AutoTokenizer.from_pretrained("Yoxas/autotrain-phi3-statistical", trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained("Yoxas/autotrain-phi3-statistical", trust_remote_code=True)
 except Exception as e:
     print(f"Error loading model: {e}")
+    tokenizer = None
+    model = None
 @spaces.GPU(duration=120)
 def respond(
     temperature,
     top_p,
 ):
+    if tokenizer is None or model is None:
+        return "Error: Model not loaded properly."
     messages = [{"role": "system", "content": system_message}]
     for val in history:
     messages.append({"role": "user", "content": message})
+    inputs = tokenizer(messages, return_tensors="pt", padding=True, truncation=True)
     response = ""
+    for i in range(max_tokens):
+        outputs = model.generate(inputs.input_ids, attention_mask=inputs.attention_mask, max_length=inputs.input_ids.shape[-1] + 1, temperature=temperature, top_p=top_p)
+        token = tokenizer.decode(outputs[0, -1:], skip_special_tokens=True)
         response += token
         yield response