import gradio as gr from openai import OpenAI import os from datetime import datetime # App title and description APP_TITLE = "NO GPU, Multi LLMs Uses" APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU" # Load environment variables ACCESS_TOKEN = os.getenv("HF_TOKEN") client = OpenAI( base_url="https://api-inference.huggingface.co/v1/", api_key=ACCESS_TOKEN, ) # Model categories for better organization MODEL_CATEGORIES = { "Qwen": [ "Qwen/Qwen2.5-72B-Instruct", "Qwen/Qwen2.5-3B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct", "Qwen/Qwen2.5-Coder-32B-Instruct", ], "Meta LLaMa": [ "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.1-70B-Instruct", "meta-llama/Llama-3.0-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", ], "Mistral": [ "mistralai/Mistral-Nemo-Instruct-2407", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mistral-7B-Instruct-v0.2", ], "Microsoft Phi": [ "microsoft/Phi-3.5-mini-instruct", "microsoft/Phi-3-mini-128k-instruct", "microsoft/Phi-3-mini-4k-instruct", ], "Other Models": [ "NousResearch/Hermes-3-Llama-3.1-8B", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceTB/SmolLM2-360M-Instruct", "tiiuae/falcon-7b-instruct", "01-ai/Yi-1.5-34B-Chat", ] } # Flatten the model list ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models] def get_model_info(model_name): parts = model_name.split('/') if len(parts) != 2: return f"**Model:** {model_name}\n**Format:** Unknown" org, model = parts import re size_match = re.search(r'(\d+\.?\d*)B', model) size = size_match.group(1) + "B" if size_match else "Unknown" return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}" def respond( message, history, system_message, max_tokens, temperature, top_p, frequency_penalty, seed, selected_model ): # Prepare messages if seed == -1: seed = None messages = [{"role": "system", "content": system_message}] for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) model_to_use = selected_model or ALL_MODELS[0] new_history = list(history) + [(message, "")] current_response = "" try: for chunk in client.chat.completions.create( model=model_to_use, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, frequency_penalty=frequency_penalty, seed=seed, messages=messages, ): delta = chunk.choices[0].delta.content if delta: current_response += delta new_history[-1] = (message, current_response) yield new_history except Exception as e: err = f"Error: {e}" new_history[-1] = (message, err) yield new_history with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo: gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}") with gr.Row(): with gr.Column(scale=2): # Model selection via Dropdown selected_model = gr.Dropdown( choices=ALL_MODELS, value=ALL_MODELS[0], label="Select Model" ) model_info = gr.Markdown(get_model_info(ALL_MODELS[0])) def update_info(model_name): return get_model_info(model_name) selected_model.change( fn=update_info, inputs=[selected_model], outputs=[model_info] ) # Conversation settings system_message = gr.Textbox( value="You are a helpful assistant.", label="System Prompt", lines=2 ) max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens") temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P") freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty") seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)") with gr.Column(scale=3): chatbot = gr.Chatbot() msg = gr.Textbox(placeholder="Type your message here...", show_label=False) send_btn = gr.Button("Send") send_btn.click( fn=respond, inputs=[ msg, chatbot, system_message, max_tokens, temperature, top_p, freq_penalty, seed, selected_model ], outputs=[chatbot], queue=True ) msg.submit( fn=respond, inputs=[ msg, chatbot, system_message, max_tokens, temperature, top_p, freq_penalty, seed, selected_model ], outputs=[chatbot], queue=True ) demo.launch()