multi-llm / app.py
keeperballon's picture
Create app.py
153a45e verified
raw
history blame
14.6 kB
import gradio as gr
from openai import OpenAI
import os
from datetime import datetime
# App title and description
APP_TITLE = "NO GPU, Multi LLMs Uses"
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"
# Load environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
custom_model
):
print(f"Received message: {message}")
print(f"Selected model: {custom_model}")
# Convert seed to None if -1 (meaning random)
if seed == -1:
seed = None
messages = [{"role": "system", "content": system_message}]
# Add conversation history to the context
for val in history:
user_part = val[0]
assistant_part = val[1]
if user_part:
messages.append({"role": "user", "content": user_part})
if assistant_part:
messages.append({"role": "assistant", "content": assistant_part})
# Append the latest user message
messages.append({"role": "user", "content": message})
# If user provided a model, use that; otherwise, fall back to a default model
model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
# Start with an empty string to build the response as tokens stream in
response = ""
try:
for message_chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
token_text = message_chunk.choices[0].delta.content
if token_text is not None: # Handle None type in response
response += token_text
yield response
except Exception as e:
yield f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."
print("Completed response generation.")
# Model categories for better organization
MODEL_CATEGORIES = {
"Meta LLaMa": [
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-3.1-70B-Instruct",
"meta-llama/Llama-3.0-70B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"meta-llama/Llama-3.2-1B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
],
"Mistral": [
"mistralai/Mistral-Nemo-Instruct-2407",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
"mistralai/Mistral-7B-Instruct-v0.2",
],
"Qwen": [
"Qwen/Qwen3-235B-A22B",
"Qwen/Qwen3-32B",
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
"Qwen/QwQ-32B",
"Qwen/Qwen2.5-Coder-32B-Instruct",
],
"Microsoft Phi": [
"microsoft/Phi-3.5-mini-instruct",
"microsoft/Phi-3-mini-128k-instruct",
"microsoft/Phi-3-mini-4k-instruct",
],
"Other Models": [
"NousResearch/Hermes-3-Llama-3.1-8B",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"HuggingFaceH4/zephyr-7b-beta",
"HuggingFaceTB/SmolLM2-360M-Instruct",
"tiiuae/falcon-7b-instruct",
"01-ai/Yi-1.5-34B-Chat",
]
}
# Flatten the model list for search functionality
ALL_MODELS = []
for category, models in MODEL_CATEGORIES.items():
ALL_MODELS.extend(models)
# Helper function to get model info display
def get_model_info(model_name):
"""Extract and format model information for display"""
parts = model_name.split('/')
org = parts[0]
model = parts[1]
# Extract numbers from model name to determine size
import re
size_match = re.search(r'(\d+\.?\d*)B', model)
size = size_match.group(1) + "B" if size_match else "Unknown"
return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"
def filter_models(search_term):
"""Filter models based on search term across all categories"""
if not search_term:
return MODEL_CATEGORIES
filtered_categories = {}
for category, models in MODEL_CATEGORIES.items():
filtered_models = [m for m in models if search_term.lower() in m.lower()]
if filtered_models:
filtered_categories[category] = filtered_models
return filtered_categories
def update_model_display(search_term=""):
"""Update the model selection UI based on search term"""
filtered_categories = filter_models(search_term)
# Create HTML for model display
html = "<div style='max-height: 400px; overflow-y: auto;'>"
for category, models in filtered_categories.items():
html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"
for model in models:
model_short = model.split('/')[-1]
html += f"""
<div class='model-card' onclick='selectModel("{model}")'
style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
<div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
<div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
</div>
"""
html += "</div>"
if not filtered_categories:
html += "<p>No models found matching your search.</p>"
html += "</div><script>function selectModel(model) { document.getElementById('custom-model-input').value = model; }</script>"
return html
# Create custom CSS for better styling
custom_css = """
#app-container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
#chat-container {
border-radius: 12px;
box-shadow: 0 8px 16px rgba(0,0,0,0.1);
overflow: hidden;
}
.contain {
background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
}
h1, h2, h3 {
font-family: 'Poppins', sans-serif;
}
h1 {
background: linear-gradient(90deg, #2b6cb0, #4299e1);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-weight: 700;
letter-spacing: -0.5px;
margin-bottom: 8px;
}
.parameter-row {
display: flex;
gap: 10px;
margin-bottom: 10px;
}
.model-card:hover {
transform: translateY(-2px);
box-shadow: 0 6px 12px rgba(0,0,0,0.15);
border-color: #4299e1;
}
.tabs {
box-shadow: 0 2px 10px rgba(0,0,0,0.05);
border-radius: 8px;
overflow: hidden;
}
.footer {
text-align: center;
margin-top: 20px;
font-size: 0.8em;
color: #666;
}
/* Status indicator styles */
.status-indicator {
display: inline-block;
width: 10px;
height: 10px;
border-radius: 50%;
margin-right: 6px;
}
.status-active {
background-color: #10B981;
animation: pulse 2s infinite;
}
@keyframes pulse {
0% {
box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
}
70% {
box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
}
100% {
box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
}
}
/* Parameter tooltips */
.parameter-container {
position: relative;
}
.parameter-info {
display: none;
position: absolute;
background: white;
border: 1px solid #ddd;
padding: 10px;
border-radius: 6px;
box-shadow: 0 2px 5px rgba(0,0,0,0.2);
z-index: 100;
width: 250px;
top: 100%;
left: 10px;
}
.parameter-container:hover .parameter-info {
display: block;
}
"""
with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
gr.HTML(f"""
<div id="app-container">
<div style="text-align: center; padding: 20px 0;">
<h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
<p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
<div style="margin-top: 10px;">
<span class="status-indicator status-active"></span>
<span>Service Active</span>
<span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
</div>
</div>
</div>
""")
with gr.Row():
with gr.Column(scale=3):
# Main chat interface
with gr.Box(elem_id="chat-container"):
chatbot = gr.Chatbot(
height=550,
show_copy_button=True,
placeholder="Select a model and begin chatting",
layout="panel"
)
with gr.Row():
with gr.Column(scale=8):
msg = gr.Textbox(
show_label=False,
placeholder="Type your message here...",
container=False,
scale=8
)
with gr.Column(scale=1, min_width=70):
submit_btn = gr.Button("Send", variant="primary", scale=1)
with gr.Accordion("Conversation Settings", open=False):
system_message_box = gr.Textbox(
value="You are a helpful assistant.",
placeholder="System prompt that guides the assistant's behavior",
label="System Prompt",
lines=2
)
with gr.Tabs(elem_classes="tabs"):
with gr.TabItem("Basic Parameters"):
with gr.Row(elem_classes="parameter-row"):
with gr.Column():
max_tokens_slider = gr.Slider(
minimum=1,
maximum=4096,
value=512,
step=1,
label="Max new tokens"
)
with gr.Column():
temperature_slider = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
with gr.TabItem("Advanced Parameters"):
with gr.Row(elem_classes="parameter-row"):
with gr.Column():
top_p_slider = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P"
)
with gr.Column():
frequency_penalty_slider = gr.Slider(
minimum=-2.0,
maximum=2.0,
value=0.0,
step=0.1,
label="Frequency Penalty"
)
seed_slider = gr.Slider(
minimum=-1,
maximum=65535,
value=-1,
step=1,
label="Seed (-1 for random)"
)
with gr.Column(scale=2):
# Model selection panel
with gr.Box():
gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")
# Custom model input (this is what the respond function sees)
custom_model_box = gr.Textbox(
value="meta-llama/Llama-3.3-70B-Instruct",
label="Selected Model",
elem_id="custom-model-input"
)
# Search box
model_search_box = gr.Textbox(
label="Search Models",
placeholder="Type to filter models...",
lines=1
)
# Dynamic model display area
model_display = gr.HTML(update_model_display())
# Model information display
gr.HTML("<h4>Current Model Info</h4>")
model_info_display = gr.Markdown(get_model_info("meta-llama/Llama-3.3-70B-Instruct"))
# Footer
gr.HTML("""
<div class="footer">
<p>Created with Gradio • Powered by Hugging Face Inference API</p>
<p>This interface allows you to chat with various language models without requiring a GPU</p>
</div>
""")
# Set up event handlers
msg.submit(
fn=respond,
inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
outputs=[chatbot],
queue=True
)
submit_btn.click(
fn=respond,
inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
outputs=[chatbot],
queue=True
)
# Update model display when search changes
model_search_box.change(
fn=lambda x: update_model_display(x),
inputs=model_search_box,
outputs=model_display
)
# Update model info when selection changes
custom_model_box.change(
fn=lambda x: get_model_info(x),
inputs=custom_model_box,
outputs=model_info_display
)
if __name__ == "__main__":
print("Launching the enhanced multi-model chat interface.")
demo.launch()