Spaces:

keeperballon
/

multi-llm

Running

App Files Files Community

multi-llm / app.py

keeperballon

Create app.py

153a45e verified 10 days ago

raw

history blame

14.6 kB

	import gradio as gr
	from openai import OpenAI
	import os
	from datetime import datetime

	# App title and description
	APP_TITLE = "NO GPU, Multi LLMs Uses"
	APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"

	# Load environment variables
	ACCESS_TOKEN = os.getenv("HF_TOKEN")
	print("Access token loaded.")

	client = OpenAI(
	base_url="https://api-inference.huggingface.co/v1/",
	api_key=ACCESS_TOKEN,
	)
	print("OpenAI client initialized.")


	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	frequency_penalty,
	seed,
	custom_model
	):
	print(f"Received message: {message}")
	print(f"Selected model: {custom_model}")

	# Convert seed to None if -1 (meaning random)
	if seed == -1:
	seed = None

	messages = [{"role": "system", "content": system_message}]

	# Add conversation history to the context
	for val in history:
	user_part = val[0]
	assistant_part = val[1]
	if user_part:
	messages.append({"role": "user", "content": user_part})
	if assistant_part:
	messages.append({"role": "assistant", "content": assistant_part})

	# Append the latest user message
	messages.append({"role": "user", "content": message})

	# If user provided a model, use that; otherwise, fall back to a default model
	model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"

	# Start with an empty string to build the response as tokens stream in
	response = ""

	try:
	for message_chunk in client.chat.completions.create(
	model=model_to_use,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	frequency_penalty=frequency_penalty,
	seed=seed,
	messages=messages,
	):
	token_text = message_chunk.choices[0].delta.content
	if token_text is not None: # Handle None type in response
	response += token_text
	yield response
	except Exception as e:
	yield f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."

	print("Completed response generation.")


	# Model categories for better organization
	MODEL_CATEGORIES = {
	"Meta LLaMa": [
	"meta-llama/Llama-3.3-70B-Instruct",
	"meta-llama/Llama-3.1-70B-Instruct",
	"meta-llama/Llama-3.0-70B-Instruct",
	"meta-llama/Llama-3.2-3B-Instruct",
	"meta-llama/Llama-3.2-1B-Instruct",
	"meta-llama/Llama-3.1-8B-Instruct",
	],
	"Mistral": [
	"mistralai/Mistral-Nemo-Instruct-2407",
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"mistralai/Mistral-7B-Instruct-v0.2",
	],
	"Qwen": [
	"Qwen/Qwen3-235B-A22B",
	"Qwen/Qwen3-32B",
	"Qwen/Qwen2.5-72B-Instruct",
	"Qwen/Qwen2.5-3B-Instruct",
	"Qwen/Qwen2.5-0.5B-Instruct",
	"Qwen/QwQ-32B",
	"Qwen/Qwen2.5-Coder-32B-Instruct",
	],
	"Microsoft Phi": [
	"microsoft/Phi-3.5-mini-instruct",
	"microsoft/Phi-3-mini-128k-instruct",
	"microsoft/Phi-3-mini-4k-instruct",
	],
	"Other Models": [
	"NousResearch/Hermes-3-Llama-3.1-8B",
	"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
	"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
	"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
	"HuggingFaceH4/zephyr-7b-beta",
	"HuggingFaceTB/SmolLM2-360M-Instruct",
	"tiiuae/falcon-7b-instruct",
	"01-ai/Yi-1.5-34B-Chat",
	]
	}

	# Flatten the model list for search functionality
	ALL_MODELS = []
	for category, models in MODEL_CATEGORIES.items():
	ALL_MODELS.extend(models)


	# Helper function to get model info display
	def get_model_info(model_name):
	"""Extract and format model information for display"""
	parts = model_name.split('/')
	org = parts[0]
	model = parts[1]

	# Extract numbers from model name to determine size
	import re
	size_match = re.search(r'(\d+\.?\d*)B', model)
	size = size_match.group(1) + "B" if size_match else "Unknown"

	return f"Organization: {org}\nModel: {model}\nSize: {size}"


	def filter_models(search_term):
	"""Filter models based on search term across all categories"""
	if not search_term:
	return MODEL_CATEGORIES

	filtered_categories = {}
	for category, models in MODEL_CATEGORIES.items():
	filtered_models = [m for m in models if search_term.lower() in m.lower()]
	if filtered_models:
	filtered_categories[category] = filtered_models

	return filtered_categories


	def update_model_display(search_term=""):
	"""Update the model selection UI based on search term"""
	filtered_categories = filter_models(search_term)

	# Create HTML for model display
	html = "<div style='max-height: 400px; overflow-y: auto;'>"

	for category, models in filtered_categories.items():
	html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"

	for model in models:
	model_short = model.split('/')[-1]
	html += f"""
	<div class='model-card' onclick='selectModel("{model}")'
	style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
	background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
	<div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
	<div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
	</div>
	"""
	html += "</div>"

	if not filtered_categories:
	html += "<p>No models found matching your search.</p>"

	html += "</div><script>function selectModel(model) { document.getElementById('custom-model-input').value = model; }</script>"
	return html


	# Create custom CSS for better styling
	custom_css = """
	#app-container {
	max-width: 1200px;
	margin: 0 auto;
	padding: 20px;
	}

	#chat-container {
	border-radius: 12px;
	box-shadow: 0 8px 16px rgba(0,0,0,0.1);
	overflow: hidden;
	}

	.contain {
	background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
	}

	h1, h2, h3 {
	font-family: 'Poppins', sans-serif;
	}

	h1 {
	background: linear-gradient(90deg, #2b6cb0, #4299e1);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-weight: 700;
	letter-spacing: -0.5px;
	margin-bottom: 8px;
	}

	.parameter-row {
	display: flex;
	gap: 10px;
	margin-bottom: 10px;
	}

	.model-card:hover {
	transform: translateY(-2px);
	box-shadow: 0 6px 12px rgba(0,0,0,0.15);
	border-color: #4299e1;
	}

	.tabs {
	box-shadow: 0 2px 10px rgba(0,0,0,0.05);
	border-radius: 8px;
	overflow: hidden;
	}

	.footer {
	text-align: center;
	margin-top: 20px;
	font-size: 0.8em;
	color: #666;
	}

	/* Status indicator styles */
	.status-indicator {
	display: inline-block;
	width: 10px;
	height: 10px;
	border-radius: 50%;
	margin-right: 6px;
	}

	.status-active {
	background-color: #10B981;
	animation: pulse 2s infinite;
	}

	@keyframes pulse {
	0% {
	box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
	}
	70% {
	box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
	}
	100% {
	box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
	}
	}

	/* Parameter tooltips */
	.parameter-container {
	position: relative;
	}

	.parameter-info {
	display: none;
	position: absolute;
	background: white;
	border: 1px solid #ddd;
	padding: 10px;
	border-radius: 6px;
	box-shadow: 0 2px 5px rgba(0,0,0,0.2);
	z-index: 100;
	width: 250px;
	top: 100%;
	left: 10px;
	}

	.parameter-container:hover .parameter-info {
	display: block;
	}
	"""

	with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
	gr.HTML(f"""
	<div id="app-container">
	<div style="text-align: center; padding: 20px 0;">
	<h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
	<p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
	<div style="margin-top: 10px;">
	<span class="status-indicator status-active"></span>
	<span>Service Active</span>
	<span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
	</div>
	</div>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=3):
	# Main chat interface
	with gr.Box(elem_id="chat-container"):
	chatbot = gr.Chatbot(
	height=550,
	show_copy_button=True,
	placeholder="Select a model and begin chatting",
	layout="panel"
	)

	with gr.Row():
	with gr.Column(scale=8):
	msg = gr.Textbox(
	show_label=False,
	placeholder="Type your message here...",
	container=False,
	scale=8
	)
	with gr.Column(scale=1, min_width=70):
	submit_btn = gr.Button("Send", variant="primary", scale=1)

	with gr.Accordion("Conversation Settings", open=False):
	system_message_box = gr.Textbox(
	value="You are a helpful assistant.",
	placeholder="System prompt that guides the assistant's behavior",
	label="System Prompt",
	lines=2
	)

	with gr.Tabs(elem_classes="tabs"):
	with gr.TabItem("Basic Parameters"):
	with gr.Row(elem_classes="parameter-row"):
	with gr.Column():
	max_tokens_slider = gr.Slider(
	minimum=1,
	maximum=4096,
	value=512,
	step=1,
	label="Max new tokens"
	)
	with gr.Column():
	temperature_slider = gr.Slider(
	minimum=0.1,
	maximum=4.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	with gr.TabItem("Advanced Parameters"):
	with gr.Row(elem_classes="parameter-row"):
	with gr.Column():
	top_p_slider = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-P"
	)
	with gr.Column():
	frequency_penalty_slider = gr.Slider(
	minimum=-2.0,
	maximum=2.0,
	value=0.0,
	step=0.1,
	label="Frequency Penalty"
	)

	seed_slider = gr.Slider(
	minimum=-1,
	maximum=65535,
	value=-1,
	step=1,
	label="Seed (-1 for random)"
	)

	with gr.Column(scale=2):
	# Model selection panel
	with gr.Box():
	gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")

	# Custom model input (this is what the respond function sees)
	custom_model_box = gr.Textbox(
	value="meta-llama/Llama-3.3-70B-Instruct",
	label="Selected Model",
	elem_id="custom-model-input"
	)

	# Search box
	model_search_box = gr.Textbox(
	label="Search Models",
	placeholder="Type to filter models...",
	lines=1
	)

	# Dynamic model display area
	model_display = gr.HTML(update_model_display())

	# Model information display
	gr.HTML("<h4>Current Model Info</h4>")
	model_info_display = gr.Markdown(get_model_info("meta-llama/Llama-3.3-70B-Instruct"))

	# Footer
	gr.HTML("""
	<div class="footer">
	<p>Created with Gradio • Powered by Hugging Face Inference API</p>
	<p>This interface allows you to chat with various language models without requiring a GPU</p>
	</div>
	""")

	# Set up event handlers
	msg.submit(
	fn=respond,
	inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
	top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
	outputs=[chatbot],
	queue=True
	)

	submit_btn.click(
	fn=respond,
	inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
	top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
	outputs=[chatbot],
	queue=True
	)

	# Update model display when search changes
	model_search_box.change(
	fn=lambda x: update_model_display(x),
	inputs=model_search_box,
	outputs=model_display
	)

	# Update model info when selection changes
	custom_model_box.change(
	fn=lambda x: get_model_info(x),
	inputs=custom_model_box,
	outputs=model_info_display
	)

	if __name__ == "__main__":
	print("Launching the enhanced multi-model chat interface.")
	demo.launch()