Milo-LLM-Test

Runtime error

vericudebuget commited on Apr 21, 2024

Commit

1c32556

verified ·

1 Parent(s): bbeb8f2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from huggingface_hub import InferenceClient
 import gradio as gr
 import datetime
-from pathlib import Path
-# Initialize the InferenceClient
-client = InferenceClient("vericudebuget/Milo-LLM")
 def format_prompt(message, history):
     prompt = "<s>"
@@ -29,11 +29,10 @@ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=904
     formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
     system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
     formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
-    output = ""
-    for response in stream:
-        output += response.token.text
-        yield output
 additional_inputs = [
     gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
@@ -56,4 +55,4 @@ gr.ChatInterface(
     clear_btn="Clear (New chat)",
     stop_btn="Stop ▢",
     concurrency_limit=20,
-).launch(show_api=False)

+from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import datetime
+# Load model directly
+tokenizer = AutoTokenizer.from_pretrained("bunnycore/Chimera-Apex-7B")
+model = AutoModelForCausalLM.from_pretrained("bunnycore/Chimera-Apex-7B")
 def format_prompt(message, history):
     prompt = "<s>"
     formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
     system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
     formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
+    inputs = tokenizer.encode(formatted_prompt, return_tensors="pt")
+    outputs = model.generate(inputs, **generate_kwargs)
+    output = tokenizer.decode(outputs[0])
+    yield output
 additional_inputs = [
     gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
     clear_btn="Clear (New chat)",
     stop_btn="Stop ▢",
     concurrency_limit=20,
+).launch(show_api=False)