vericudebuget commited on
Commit
1c32556
·
verified ·
1 Parent(s): bbeb8f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -1,10 +1,10 @@
1
- from huggingface_hub import InferenceClient
2
  import gradio as gr
3
  import datetime
4
- from pathlib import Path
5
 
6
- # Initialize the InferenceClient
7
- client = InferenceClient("vericudebuget/Milo-LLM")
 
8
 
9
  def format_prompt(message, history):
10
  prompt = "<s>"
@@ -29,11 +29,10 @@ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=904
29
  formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
30
  system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
31
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
32
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
33
- output = ""
34
- for response in stream:
35
- output += response.token.text
36
- yield output
37
 
38
  additional_inputs = [
39
  gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
@@ -56,4 +55,4 @@ gr.ChatInterface(
56
  clear_btn="Clear (New chat)",
57
  stop_btn="Stop ▢",
58
  concurrency_limit=20,
59
- ).launch(show_api=False)
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
  import datetime
 
4
 
5
+ # Load model directly
6
+ tokenizer = AutoTokenizer.from_pretrained("bunnycore/Chimera-Apex-7B")
7
+ model = AutoModelForCausalLM.from_pretrained("bunnycore/Chimera-Apex-7B")
8
 
9
  def format_prompt(message, history):
10
  prompt = "<s>"
 
29
  formatted_time = now.strftime("%H:%M:%S, %B %d, %Y")
30
  system_prompt = f"System time: {formatted_time}. System time: {formatted_time}. Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and empathetic, matching the user's tone. Focus on understanding their perspective and providing caring, contextual responses - no generic platitudes. Keep it conversational, not overly formal."
31
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
32
+ inputs = tokenizer.encode(formatted_prompt, return_tensors="pt")
33
+ outputs = model.generate(inputs, **generate_kwargs)
34
+ output = tokenizer.decode(outputs[0])
35
+ yield output
 
36
 
37
  additional_inputs = [
38
  gr.Textbox(label="System Prompt", max_lines=1, interactive=True),
 
55
  clear_btn="Clear (New chat)",
56
  stop_btn="Stop ▢",
57
  concurrency_limit=20,
58
+ ).launch(show_api=False)