Milo-LLM-Test

Runtime error

vericudebuget commited on Apr 21, 2024

Commit

715114c

verified ·

1 Parent(s): 1c32556

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import datetime
-# Load model directly
-tokenizer = AutoTokenizer.from_pretrained("bunnycore/Chimera-Apex-7B")
-model = AutoModelForCausalLM.from_pretrained("bunnycore/Chimera-Apex-7B")
 def format_prompt(message, history):
     prompt = "<s>"
@@ -29,10 +29,11 @@ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=904
     formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
     system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
     formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
-    inputs = tokenizer.encode(formatted_prompt, return_tensors="pt")
-    outputs = model.generate(inputs, **generate_kwargs)
-    output = tokenizer.decode(outputs[0])
-    yield output
 additional_inputs = [
     gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
@@ -55,4 +56,4 @@ gr.ChatInterface(
     clear_btn="Clear (New chat)",
     stop_btn="Stop ▢",
     concurrency_limit=20,
-).launch(show_api=False)

+from huggingface_hub import InferenceClient
 import gradio as gr
 import datetime
+from pathlib import Path
+# Initialize the InferenceClient
+client = InferenceClient("vericudebuget/Milo-LLM")
 def format_prompt(message, history):
     prompt = "<s>"
     formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
     system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
     formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
+    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""
+    for response in stream:
+        output += response.token.text
+        yield output
 additional_inputs = [
     gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
     clear_btn="Clear (New chat)",
     stop_btn="Stop ▢",
     concurrency_limit=20,
+).launch(show_api=False)