multi-llm / app.py
keeperballon's picture
Update app.py
8c089fc verified
import gradio as gr
from openai import OpenAI
import os
from datetime import datetime
# App title and description
APP_TITLE = "NO GPU, Multi LLMs Uses"
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"
# Load environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
# Model categories for better organization
MODEL_CATEGORIES = {
"Qwen": [
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
"Qwen/Qwen2.5-Coder-32B-Instruct",
],
"Meta LLaMa": [
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-3.1-70B-Instruct",
"meta-llama/Llama-3.0-70B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"meta-llama/Llama-3.2-1B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
],
"Mistral": [
"mistralai/Mistral-Nemo-Instruct-2407",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
"mistralai/Mistral-7B-Instruct-v0.2",
],
"Microsoft Phi": [
"microsoft/Phi-3.5-mini-instruct",
"microsoft/Phi-3-mini-128k-instruct",
"microsoft/Phi-3-mini-4k-instruct",
],
"Other Models": [
"NousResearch/Hermes-3-Llama-3.1-8B",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"HuggingFaceH4/zephyr-7b-beta",
"HuggingFaceTB/SmolLM2-360M-Instruct",
"tiiuae/falcon-7b-instruct",
"01-ai/Yi-1.5-34B-Chat",
]
}
# Flatten the model list
ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models]
def get_model_info(model_name):
parts = model_name.split('/')
if len(parts) != 2:
return f"**Model:** {model_name}\n**Format:** Unknown"
org, model = parts
import re
size_match = re.search(r'(\d+\.?\d*)B', model)
size = size_match.group(1) + "B" if size_match else "Unknown"
return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
selected_model
):
# Prepare messages
if seed == -1:
seed = None
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
model_to_use = selected_model or ALL_MODELS[0]
new_history = list(history) + [(message, "")]
current_response = ""
try:
for chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
delta = chunk.choices[0].delta.content
if delta:
current_response += delta
new_history[-1] = (message, current_response)
yield new_history
except Exception as e:
err = f"Error: {e}"
new_history[-1] = (message, err)
yield new_history
with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}")
with gr.Row():
with gr.Column(scale=2):
# Model selection via Dropdown
selected_model = gr.Dropdown(
choices=ALL_MODELS,
value=ALL_MODELS[0],
label="Select Model"
)
model_info = gr.Markdown(get_model_info(ALL_MODELS[0]))
def update_info(model_name):
return get_model_info(model_name)
selected_model.change(
fn=update_info,
inputs=[selected_model],
outputs=[model_info]
)
# Conversation settings
system_message = gr.Textbox(
value="You are a helpful assistant.",
label="System Prompt",
lines=2
)
max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens")
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)")
with gr.Column(scale=3):
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Type your message here...", show_label=False)
send_btn = gr.Button("Send")
send_btn.click(
fn=respond,
inputs=[
msg, chatbot, system_message,
max_tokens, temperature, top_p,
freq_penalty, seed, selected_model
],
outputs=[chatbot],
queue=True
)
msg.submit(
fn=respond,
inputs=[
msg, chatbot, system_message,
max_tokens, temperature, top_p,
freq_penalty, seed, selected_model
],
outputs=[chatbot],
queue=True
)
demo.launch()