Spaces:

keeperballon
/

multi-llm

Running

App Files Files Community

multi-llm / app.py

keeperballon

Update app.py

8c089fc verified 8 days ago

raw

history blame contribute delete

5.79 kB

	import gradio as gr
	from openai import OpenAI
	import os
	from datetime import datetime

	# App title and description
	APP_TITLE = "NO GPU, Multi LLMs Uses"
	APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"

	# Load environment variables
	ACCESS_TOKEN = os.getenv("HF_TOKEN")
	client = OpenAI(
	base_url="https://api-inference.huggingface.co/v1/",
	api_key=ACCESS_TOKEN,
	)

	# Model categories for better organization
	MODEL_CATEGORIES = {
	"Qwen": [
	"Qwen/Qwen2.5-72B-Instruct",
	"Qwen/Qwen2.5-3B-Instruct",
	"Qwen/Qwen2.5-0.5B-Instruct",
	"Qwen/Qwen2.5-Coder-32B-Instruct",
	],
	"Meta LLaMa": [
	"meta-llama/Llama-3.3-70B-Instruct",
	"meta-llama/Llama-3.1-70B-Instruct",
	"meta-llama/Llama-3.0-70B-Instruct",
	"meta-llama/Llama-3.2-3B-Instruct",
	"meta-llama/Llama-3.2-1B-Instruct",
	"meta-llama/Llama-3.1-8B-Instruct",
	],
	"Mistral": [
	"mistralai/Mistral-Nemo-Instruct-2407",
	"mistralai/Mixtral-8x7B-Instruct-v0.1",
	"mistralai/Mistral-7B-Instruct-v0.3",
	"mistralai/Mistral-7B-Instruct-v0.2",
	],
	"Microsoft Phi": [
	"microsoft/Phi-3.5-mini-instruct",
	"microsoft/Phi-3-mini-128k-instruct",
	"microsoft/Phi-3-mini-4k-instruct",
	],
	"Other Models": [

	"NousResearch/Hermes-3-Llama-3.1-8B",
	"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
	"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
	"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
	"HuggingFaceH4/zephyr-7b-beta",
	"HuggingFaceTB/SmolLM2-360M-Instruct",
	"tiiuae/falcon-7b-instruct",
	"01-ai/Yi-1.5-34B-Chat",
	]
	}

	# Flatten the model list
	ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models]

	def get_model_info(model_name):
	parts = model_name.split('/')
	if len(parts) != 2:
	return f"Model: {model_name}\nFormat: Unknown"
	org, model = parts
	import re
	size_match = re.search(r'(\d+\.?\d*)B', model)
	size = size_match.group(1) + "B" if size_match else "Unknown"
	return f"Organization: {org}\nModel: {model}\nSize: {size}"

	def respond(
	message,
	history,
	system_message,
	max_tokens,
	temperature,
	top_p,
	frequency_penalty,
	seed,
	selected_model
	):
	# Prepare messages
	if seed == -1:
	seed = None
	messages = [{"role": "system", "content": system_message}]
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	model_to_use = selected_model or ALL_MODELS[0]

	new_history = list(history) + [(message, "")]
	current_response = ""
	try:
	for chunk in client.chat.completions.create(
	model=model_to_use,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	frequency_penalty=frequency_penalty,
	seed=seed,
	messages=messages,
	):
	delta = chunk.choices[0].delta.content
	if delta:
	current_response += delta
	new_history[-1] = (message, current_response)
	yield new_history
	except Exception as e:
	err = f"Error: {e}"
	new_history[-1] = (message, err)
	yield new_history

	with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
	gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}")

	with gr.Row():
	with gr.Column(scale=2):
	# Model selection via Dropdown
	selected_model = gr.Dropdown(
	choices=ALL_MODELS,
	value=ALL_MODELS[0],
	label="Select Model"
	)
	model_info = gr.Markdown(get_model_info(ALL_MODELS[0]))

	def update_info(model_name):
	return get_model_info(model_name)
	selected_model.change(
	fn=update_info,
	inputs=[selected_model],
	outputs=[model_info]
	)

	# Conversation settings
	system_message = gr.Textbox(
	value="You are a helpful assistant.",
	label="System Prompt",
	lines=2
	)

	max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens")
	temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
	top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
	freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
	seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)")

	with gr.Column(scale=3):
	chatbot = gr.Chatbot()
	msg = gr.Textbox(placeholder="Type your message here...", show_label=False)
	send_btn = gr.Button("Send")

	send_btn.click(
	fn=respond,
	inputs=[
	msg, chatbot, system_message,
	max_tokens, temperature, top_p,
	freq_penalty, seed, selected_model
	],
	outputs=[chatbot],
	queue=True
	)
	msg.submit(
	fn=respond,
	inputs=[
	msg, chatbot, system_message,
	max_tokens, temperature, top_p,
	freq_penalty, seed, selected_model
	],
	outputs=[chatbot],
	queue=True
	)

	demo.launch()