vericudebuget commited on
Commit
715114c
·
verified ·
1 Parent(s): 1c32556

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -1,10 +1,10 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
  import datetime
 
4
 
5
- # Load model directly
6
- tokenizer = AutoTokenizer.from_pretrained("bunnycore/Chimera-Apex-7B")
7
- model = AutoModelForCausalLM.from_pretrained("bunnycore/Chimera-Apex-7B")
8
 
9
  def format_prompt(message, history):
10
  prompt = "<s>"
@@ -29,10 +29,11 @@ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=904
29
  formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
30
  system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
31
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
32
- inputs = tokenizer.encode(formatted_prompt, return_tensors="pt")
33
- outputs = model.generate(inputs, **generate_kwargs)
34
- output = tokenizer.decode(outputs[0])
35
- yield output
 
36
 
37
  additional_inputs = [
38
  gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
@@ -55,4 +56,4 @@ gr.ChatInterface(
55
  clear_btn="Clear (New chat)",
56
  stop_btn="Stop ▢",
57
  concurrency_limit=20,
58
- ).launch(show_api=False)
 
1
+ from huggingface_hub import InferenceClient
2
  import gradio as gr
3
  import datetime
4
+ from pathlib import Path
5
 
6
+ # Initialize the InferenceClient
7
+ client = InferenceClient("vericudebuget/Milo-LLM")
 
8
 
9
  def format_prompt(message, history):
10
  prompt = "<s>"
 
29
  formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
30
  system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
31
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
32
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
33
+ output = ""
34
+ for response in stream:
35
+ output += response.token.text
36
+ yield output
37
 
38
  additional_inputs = [
39
  gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
 
56
  clear_btn="Clear (New chat)",
57
  stop_btn="Stop ▢",
58
  concurrency_limit=20,
59
+ ).launch(show_api=False)