Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import os | |
from datetime import datetime | |
# App title and description | |
APP_TITLE = "NO GPU, Multi LLMs Uses" | |
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU" | |
# Load environment variables | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=ACCESS_TOKEN, | |
) | |
# Model categories for better organization | |
MODEL_CATEGORIES = { | |
"Qwen": [ | |
"Qwen/Qwen2.5-72B-Instruct", | |
"Qwen/Qwen2.5-3B-Instruct", | |
"Qwen/Qwen2.5-0.5B-Instruct", | |
"Qwen/Qwen2.5-Coder-32B-Instruct", | |
], | |
"Meta LLaMa": [ | |
"meta-llama/Llama-3.3-70B-Instruct", | |
"meta-llama/Llama-3.1-70B-Instruct", | |
"meta-llama/Llama-3.0-70B-Instruct", | |
"meta-llama/Llama-3.2-3B-Instruct", | |
"meta-llama/Llama-3.2-1B-Instruct", | |
"meta-llama/Llama-3.1-8B-Instruct", | |
], | |
"Mistral": [ | |
"mistralai/Mistral-Nemo-Instruct-2407", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
"mistralai/Mistral-7B-Instruct-v0.3", | |
"mistralai/Mistral-7B-Instruct-v0.2", | |
], | |
"Microsoft Phi": [ | |
"microsoft/Phi-3.5-mini-instruct", | |
"microsoft/Phi-3-mini-128k-instruct", | |
"microsoft/Phi-3-mini-4k-instruct", | |
], | |
"Other Models": [ | |
"NousResearch/Hermes-3-Llama-3.1-8B", | |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", | |
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", | |
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", | |
"HuggingFaceH4/zephyr-7b-beta", | |
"HuggingFaceTB/SmolLM2-360M-Instruct", | |
"tiiuae/falcon-7b-instruct", | |
"01-ai/Yi-1.5-34B-Chat", | |
] | |
} | |
# Flatten the model list | |
ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models] | |
def get_model_info(model_name): | |
parts = model_name.split('/') | |
if len(parts) != 2: | |
return f"**Model:** {model_name}\n**Format:** Unknown" | |
org, model = parts | |
import re | |
size_match = re.search(r'(\d+\.?\d*)B', model) | |
size = size_match.group(1) + "B" if size_match else "Unknown" | |
return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}" | |
def respond( | |
message, | |
history, | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
frequency_penalty, | |
seed, | |
selected_model | |
): | |
# Prepare messages | |
if seed == -1: | |
seed = None | |
messages = [{"role": "system", "content": system_message}] | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
messages.append({"role": "user", "content": message}) | |
model_to_use = selected_model or ALL_MODELS[0] | |
new_history = list(history) + [(message, "")] | |
current_response = "" | |
try: | |
for chunk in client.chat.completions.create( | |
model=model_to_use, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
frequency_penalty=frequency_penalty, | |
seed=seed, | |
messages=messages, | |
): | |
delta = chunk.choices[0].delta.content | |
if delta: | |
current_response += delta | |
new_history[-1] = (message, current_response) | |
yield new_history | |
except Exception as e: | |
err = f"Error: {e}" | |
new_history[-1] = (message, err) | |
yield new_history | |
with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo: | |
gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
# Model selection via Dropdown | |
selected_model = gr.Dropdown( | |
choices=ALL_MODELS, | |
value=ALL_MODELS[0], | |
label="Select Model" | |
) | |
model_info = gr.Markdown(get_model_info(ALL_MODELS[0])) | |
def update_info(model_name): | |
return get_model_info(model_name) | |
selected_model.change( | |
fn=update_info, | |
inputs=[selected_model], | |
outputs=[model_info] | |
) | |
# Conversation settings | |
system_message = gr.Textbox( | |
value="You are a helpful assistant.", | |
label="System Prompt", | |
lines=2 | |
) | |
max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens") | |
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature") | |
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P") | |
freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty") | |
seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)") | |
with gr.Column(scale=3): | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(placeholder="Type your message here...", show_label=False) | |
send_btn = gr.Button("Send") | |
send_btn.click( | |
fn=respond, | |
inputs=[ | |
msg, chatbot, system_message, | |
max_tokens, temperature, top_p, | |
freq_penalty, seed, selected_model | |
], | |
outputs=[chatbot], | |
queue=True | |
) | |
msg.submit( | |
fn=respond, | |
inputs=[ | |
msg, chatbot, system_message, | |
max_tokens, temperature, top_p, | |
freq_penalty, seed, selected_model | |
], | |
outputs=[chatbot], | |
queue=True | |
) | |
demo.launch() | |