Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import os | |
from datetime import datetime | |
# App title and description | |
APP_TITLE = "NO GPU, Multi LLMs Uses" | |
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU" | |
# Load environment variables | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
print("Access token loaded.") | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=ACCESS_TOKEN, | |
) | |
print("OpenAI client initialized.") | |
def respond( | |
message, | |
history: list[tuple[str, str]], | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
frequency_penalty, | |
seed, | |
custom_model | |
): | |
print(f"Received message: {message}") | |
print(f"Selected model: {custom_model}") | |
# Convert seed to None if -1 (meaning random) | |
if seed == -1: | |
seed = None | |
messages = [{"role": "system", "content": system_message}] | |
# Add conversation history to the context | |
for val in history: | |
user_part = val[0] | |
assistant_part = val[1] | |
if user_part: | |
messages.append({"role": "user", "content": user_part}) | |
if assistant_part: | |
messages.append({"role": "assistant", "content": assistant_part}) | |
# Append the latest user message | |
messages.append({"role": "user", "content": message}) | |
# If user provided a model, use that; otherwise, fall back to a default model | |
model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct" | |
# Start with an empty string to build the response as tokens stream in | |
response = "" | |
try: | |
for message_chunk in client.chat.completions.create( | |
model=model_to_use, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
frequency_penalty=frequency_penalty, | |
seed=seed, | |
messages=messages, | |
): | |
token_text = message_chunk.choices[0].delta.content | |
if token_text is not None: # Handle None type in response | |
response += token_text | |
yield response | |
except Exception as e: | |
yield f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later." | |
print("Completed response generation.") | |
# Model categories for better organization | |
MODEL_CATEGORIES = { | |
"Meta LLaMa": [ | |
"meta-llama/Llama-3.3-70B-Instruct", | |
"meta-llama/Llama-3.1-70B-Instruct", | |
"meta-llama/Llama-3.0-70B-Instruct", | |
"meta-llama/Llama-3.2-3B-Instruct", | |
"meta-llama/Llama-3.2-1B-Instruct", | |
"meta-llama/Llama-3.1-8B-Instruct", | |
], | |
"Mistral": [ | |
"mistralai/Mistral-Nemo-Instruct-2407", | |
"mistralai/Mixtral-8x7B-Instruct-v0.1", | |
"mistralai/Mistral-7B-Instruct-v0.3", | |
"mistralai/Mistral-7B-Instruct-v0.2", | |
], | |
"Qwen": [ | |
"Qwen/Qwen3-235B-A22B", | |
"Qwen/Qwen3-32B", | |
"Qwen/Qwen2.5-72B-Instruct", | |
"Qwen/Qwen2.5-3B-Instruct", | |
"Qwen/Qwen2.5-0.5B-Instruct", | |
"Qwen/QwQ-32B", | |
"Qwen/Qwen2.5-Coder-32B-Instruct", | |
], | |
"Microsoft Phi": [ | |
"microsoft/Phi-3.5-mini-instruct", | |
"microsoft/Phi-3-mini-128k-instruct", | |
"microsoft/Phi-3-mini-4k-instruct", | |
], | |
"Other Models": [ | |
"NousResearch/Hermes-3-Llama-3.1-8B", | |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", | |
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", | |
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", | |
"HuggingFaceH4/zephyr-7b-beta", | |
"HuggingFaceTB/SmolLM2-360M-Instruct", | |
"tiiuae/falcon-7b-instruct", | |
"01-ai/Yi-1.5-34B-Chat", | |
] | |
} | |
# Flatten the model list for search functionality | |
ALL_MODELS = [] | |
for category, models in MODEL_CATEGORIES.items(): | |
ALL_MODELS.extend(models) | |
# Helper function to get model info display | |
def get_model_info(model_name): | |
"""Extract and format model information for display""" | |
parts = model_name.split('/') | |
org = parts[0] | |
model = parts[1] | |
# Extract numbers from model name to determine size | |
import re | |
size_match = re.search(r'(\d+\.?\d*)B', model) | |
size = size_match.group(1) + "B" if size_match else "Unknown" | |
return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}" | |
def filter_models(search_term): | |
"""Filter models based on search term across all categories""" | |
if not search_term: | |
return MODEL_CATEGORIES | |
filtered_categories = {} | |
for category, models in MODEL_CATEGORIES.items(): | |
filtered_models = [m for m in models if search_term.lower() in m.lower()] | |
if filtered_models: | |
filtered_categories[category] = filtered_models | |
return filtered_categories | |
def update_model_display(search_term=""): | |
"""Update the model selection UI based on search term""" | |
filtered_categories = filter_models(search_term) | |
# Create HTML for model display | |
html = "<div style='max-height: 400px; overflow-y: auto;'>" | |
for category, models in filtered_categories.items(): | |
html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>" | |
for model in models: | |
model_short = model.split('/')[-1] | |
html += f""" | |
<div class='model-card' onclick='selectModel("{model}")' | |
style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s; | |
background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'> | |
<div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div> | |
<div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div> | |
</div> | |
""" | |
html += "</div>" | |
if not filtered_categories: | |
html += "<p>No models found matching your search.</p>" | |
html += "</div><script>function selectModel(model) { document.getElementById('custom-model-input').value = model; }</script>" | |
return html | |
# Create custom CSS for better styling | |
custom_css = """ | |
#app-container { | |
max-width: 1200px; | |
margin: 0 auto; | |
padding: 20px; | |
} | |
#chat-container { | |
border-radius: 12px; | |
box-shadow: 0 8px 16px rgba(0,0,0,0.1); | |
overflow: hidden; | |
} | |
.contain { | |
background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%); | |
} | |
h1, h2, h3 { | |
font-family: 'Poppins', sans-serif; | |
} | |
h1 { | |
background: linear-gradient(90deg, #2b6cb0, #4299e1); | |
-webkit-background-clip: text; | |
-webkit-text-fill-color: transparent; | |
font-weight: 700; | |
letter-spacing: -0.5px; | |
margin-bottom: 8px; | |
} | |
.parameter-row { | |
display: flex; | |
gap: 10px; | |
margin-bottom: 10px; | |
} | |
.model-card:hover { | |
transform: translateY(-2px); | |
box-shadow: 0 6px 12px rgba(0,0,0,0.15); | |
border-color: #4299e1; | |
} | |
.tabs { | |
box-shadow: 0 2px 10px rgba(0,0,0,0.05); | |
border-radius: 8px; | |
overflow: hidden; | |
} | |
.footer { | |
text-align: center; | |
margin-top: 20px; | |
font-size: 0.8em; | |
color: #666; | |
} | |
/* Status indicator styles */ | |
.status-indicator { | |
display: inline-block; | |
width: 10px; | |
height: 10px; | |
border-radius: 50%; | |
margin-right: 6px; | |
} | |
.status-active { | |
background-color: #10B981; | |
animation: pulse 2s infinite; | |
} | |
@keyframes pulse { | |
0% { | |
box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7); | |
} | |
70% { | |
box-shadow: 0 0 0 5px rgba(16, 185, 129, 0); | |
} | |
100% { | |
box-shadow: 0 0 0 0 rgba(16, 185, 129, 0); | |
} | |
} | |
/* Parameter tooltips */ | |
.parameter-container { | |
position: relative; | |
} | |
.parameter-info { | |
display: none; | |
position: absolute; | |
background: white; | |
border: 1px solid #ddd; | |
padding: 10px; | |
border-radius: 6px; | |
box-shadow: 0 2px 5px rgba(0,0,0,0.2); | |
z-index: 100; | |
width: 250px; | |
top: 100%; | |
left: 10px; | |
} | |
.parameter-container:hover .parameter-info { | |
display: block; | |
} | |
""" | |
with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo: | |
gr.HTML(f""" | |
<div id="app-container"> | |
<div style="text-align: center; padding: 20px 0;"> | |
<h1 style="font-size: 2.5rem;">{APP_TITLE}</h1> | |
<p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p> | |
<div style="margin-top: 10px;"> | |
<span class="status-indicator status-active"></span> | |
<span>Service Active</span> | |
<span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span> | |
</div> | |
</div> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
# Main chat interface | |
with gr.Box(elem_id="chat-container"): | |
chatbot = gr.Chatbot( | |
height=550, | |
show_copy_button=True, | |
placeholder="Select a model and begin chatting", | |
layout="panel" | |
) | |
with gr.Row(): | |
with gr.Column(scale=8): | |
msg = gr.Textbox( | |
show_label=False, | |
placeholder="Type your message here...", | |
container=False, | |
scale=8 | |
) | |
with gr.Column(scale=1, min_width=70): | |
submit_btn = gr.Button("Send", variant="primary", scale=1) | |
with gr.Accordion("Conversation Settings", open=False): | |
system_message_box = gr.Textbox( | |
value="You are a helpful assistant.", | |
placeholder="System prompt that guides the assistant's behavior", | |
label="System Prompt", | |
lines=2 | |
) | |
with gr.Tabs(elem_classes="tabs"): | |
with gr.TabItem("Basic Parameters"): | |
with gr.Row(elem_classes="parameter-row"): | |
with gr.Column(): | |
max_tokens_slider = gr.Slider( | |
minimum=1, | |
maximum=4096, | |
value=512, | |
step=1, | |
label="Max new tokens" | |
) | |
with gr.Column(): | |
temperature_slider = gr.Slider( | |
minimum=0.1, | |
maximum=4.0, | |
value=0.7, | |
step=0.1, | |
label="Temperature" | |
) | |
with gr.TabItem("Advanced Parameters"): | |
with gr.Row(elem_classes="parameter-row"): | |
with gr.Column(): | |
top_p_slider = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-P" | |
) | |
with gr.Column(): | |
frequency_penalty_slider = gr.Slider( | |
minimum=-2.0, | |
maximum=2.0, | |
value=0.0, | |
step=0.1, | |
label="Frequency Penalty" | |
) | |
seed_slider = gr.Slider( | |
minimum=-1, | |
maximum=65535, | |
value=-1, | |
step=1, | |
label="Seed (-1 for random)" | |
) | |
with gr.Column(scale=2): | |
# Model selection panel | |
with gr.Box(): | |
gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>") | |
# Custom model input (this is what the respond function sees) | |
custom_model_box = gr.Textbox( | |
value="meta-llama/Llama-3.3-70B-Instruct", | |
label="Selected Model", | |
elem_id="custom-model-input" | |
) | |
# Search box | |
model_search_box = gr.Textbox( | |
label="Search Models", | |
placeholder="Type to filter models...", | |
lines=1 | |
) | |
# Dynamic model display area | |
model_display = gr.HTML(update_model_display()) | |
# Model information display | |
gr.HTML("<h4>Current Model Info</h4>") | |
model_info_display = gr.Markdown(get_model_info("meta-llama/Llama-3.3-70B-Instruct")) | |
# Footer | |
gr.HTML(""" | |
<div class="footer"> | |
<p>Created with Gradio • Powered by Hugging Face Inference API</p> | |
<p>This interface allows you to chat with various language models without requiring a GPU</p> | |
</div> | |
""") | |
# Set up event handlers | |
msg.submit( | |
fn=respond, | |
inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, | |
top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box], | |
outputs=[chatbot], | |
queue=True | |
) | |
submit_btn.click( | |
fn=respond, | |
inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, | |
top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box], | |
outputs=[chatbot], | |
queue=True | |
) | |
# Update model display when search changes | |
model_search_box.change( | |
fn=lambda x: update_model_display(x), | |
inputs=model_search_box, | |
outputs=model_display | |
) | |
# Update model info when selection changes | |
custom_model_box.change( | |
fn=lambda x: get_model_info(x), | |
inputs=custom_model_box, | |
outputs=model_info_display | |
) | |
if __name__ == "__main__": | |
print("Launching the enhanced multi-model chat interface.") | |
demo.launch() |