Spaces:

keeperballon
/

multi-llm

Running

File size: 14,617 Bytes

153a45e

import gradio as gr
from openai import OpenAI
import os
from datetime import datetime

# App title and description
APP_TITLE = "NO GPU, Multi LLMs Uses"
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"

# Load environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model
):
    print(f"Received message: {message}")
    print(f"Selected model: {custom_model}")

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    messages = [{"role": "system", "content": system_message}]

    # Add conversation history to the context
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            messages.append({"role": "user", "content": user_part})
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})

    # Append the latest user message
    messages.append({"role": "user", "content": message})

    # If user provided a model, use that; otherwise, fall back to a default model
    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
    
    # Start with an empty string to build the response as tokens stream in
    response = ""

    try:
        for message_chunk in client.chat.completions.create(
            model=model_to_use,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            seed=seed,
            messages=messages,
        ):
            token_text = message_chunk.choices[0].delta.content
            if token_text is not None:  # Handle None type in response
                response += token_text
                yield response
    except Exception as e:
        yield f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."

    print("Completed response generation.")


# Model categories for better organization
MODEL_CATEGORIES = {
    "Meta LLaMa": [
        "meta-llama/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-3.1-70B-Instruct",
        "meta-llama/Llama-3.0-70B-Instruct",
        "meta-llama/Llama-3.2-3B-Instruct",
        "meta-llama/Llama-3.2-1B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct",
    ],
    "Mistral": [
        "mistralai/Mistral-Nemo-Instruct-2407",
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "mistralai/Mistral-7B-Instruct-v0.3",
        "mistralai/Mistral-7B-Instruct-v0.2",
    ],
    "Qwen": [
        "Qwen/Qwen3-235B-A22B",
        "Qwen/Qwen3-32B",
        "Qwen/Qwen2.5-72B-Instruct",
        "Qwen/Qwen2.5-3B-Instruct",
        "Qwen/Qwen2.5-0.5B-Instruct",
        "Qwen/QwQ-32B",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
    ],
    "Microsoft Phi": [
        "microsoft/Phi-3.5-mini-instruct",
        "microsoft/Phi-3-mini-128k-instruct",
        "microsoft/Phi-3-mini-4k-instruct",
    ],
    "Other Models": [
        "NousResearch/Hermes-3-Llama-3.1-8B",
        "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "HuggingFaceH4/zephyr-7b-beta",
        "HuggingFaceTB/SmolLM2-360M-Instruct",
        "tiiuae/falcon-7b-instruct",
        "01-ai/Yi-1.5-34B-Chat",
    ]
}

# Flatten the model list for search functionality
ALL_MODELS = []
for category, models in MODEL_CATEGORIES.items():
    ALL_MODELS.extend(models)


# Helper function to get model info display
def get_model_info(model_name):
    """Extract and format model information for display"""
    parts = model_name.split('/')
    org = parts[0]
    model = parts[1]
    
    # Extract numbers from model name to determine size
    import re
    size_match = re.search(r'(\d+\.?\d*)B', model)
    size = size_match.group(1) + "B" if size_match else "Unknown"
    
    return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"


def filter_models(search_term):
    """Filter models based on search term across all categories"""
    if not search_term:
        return MODEL_CATEGORIES
    
    filtered_categories = {}
    for category, models in MODEL_CATEGORIES.items():
        filtered_models = [m for m in models if search_term.lower() in m.lower()]
        if filtered_models:
            filtered_categories[category] = filtered_models
    
    return filtered_categories


def update_model_display(search_term=""):
    """Update the model selection UI based on search term"""
    filtered_categories = filter_models(search_term)
    
    # Create HTML for model display
    html = "<div style='max-height: 400px; overflow-y: auto;'>"
    
    for category, models in filtered_categories.items():
        html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"
        
        for model in models:
            model_short = model.split('/')[-1]
            html += f"""
            <div class='model-card' onclick='selectModel("{model}")' 
                 style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
                        background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
                <div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
                <div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
            </div>
            """
        html += "</div>"
    
    if not filtered_categories:
        html += "<p>No models found matching your search.</p>"
    
    html += "</div><script>function selectModel(model) { document.getElementById('custom-model-input').value = model; }</script>"
    return html


# Create custom CSS for better styling
custom_css = """
#app-container {
    max-width: 1200px;
    margin: 0 auto;
    padding: 20px;
}

#chat-container {
    border-radius: 12px;
    box-shadow: 0 8px 16px rgba(0,0,0,0.1);
    overflow: hidden;
}

.contain {
    background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
}

h1, h2, h3 {
    font-family: 'Poppins', sans-serif;
}

h1 {
    background: linear-gradient(90deg, #2b6cb0, #4299e1);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-weight: 700;
    letter-spacing: -0.5px;
    margin-bottom: 8px;
}

.parameter-row {
    display: flex;
    gap: 10px;
    margin-bottom: 10px;
}

.model-card:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 12px rgba(0,0,0,0.15);
    border-color: #4299e1;
}

.tabs {
    box-shadow: 0 2px 10px rgba(0,0,0,0.05);
    border-radius: 8px;
    overflow: hidden;
}

.footer {
    text-align: center;
    margin-top: 20px;
    font-size: 0.8em;
    color: #666;
}

/* Status indicator styles */
.status-indicator {
    display: inline-block;
    width: 10px;
    height: 10px;
    border-radius: 50%;
    margin-right: 6px;
}

.status-active {
    background-color: #10B981;
    animation: pulse 2s infinite;
}

@keyframes pulse {
    0% {
        box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
    }
    70% {
        box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
    }
    100% {
        box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
    }
}

/* Parameter tooltips */
.parameter-container {
    position: relative;
}

.parameter-info {
    display: none;
    position: absolute;
    background: white;
    border: 1px solid #ddd;
    padding: 10px;
    border-radius: 6px;
    box-shadow: 0 2px 5px rgba(0,0,0,0.2);
    z-index: 100;
    width: 250px;
    top: 100%;
    left: 10px;
}

.parameter-container:hover .parameter-info {
    display: block;
}
"""

with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
    gr.HTML(f"""
    <div id="app-container">
        <div style="text-align: center; padding: 20px 0;">
            <h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
            <p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
            <div style="margin-top: 10px;">
                <span class="status-indicator status-active"></span>
                <span>Service Active</span>
                <span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
            </div>
        </div>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            # Main chat interface
            with gr.Box(elem_id="chat-container"):
                chatbot = gr.Chatbot(
                    height=550, 
                    show_copy_button=True, 
                    placeholder="Select a model and begin chatting", 
                    layout="panel"
                )
                
                with gr.Row():
                    with gr.Column(scale=8):
                        msg = gr.Textbox(
                            show_label=False,
                            placeholder="Type your message here...",
                            container=False,
                            scale=8
                        )
                    with gr.Column(scale=1, min_width=70):
                        submit_btn = gr.Button("Send", variant="primary", scale=1)
            
            with gr.Accordion("Conversation Settings", open=False):
                system_message_box = gr.Textbox(
                    value="You are a helpful assistant.", 
                    placeholder="System prompt that guides the assistant's behavior", 
                    label="System Prompt",
                    lines=2
                )
                
                with gr.Tabs(elem_classes="tabs"):
                    with gr.TabItem("Basic Parameters"):
                        with gr.Row(elem_classes="parameter-row"):
                            with gr.Column():
                                max_tokens_slider = gr.Slider(
                                    minimum=1,
                                    maximum=4096,
                                    value=512,
                                    step=1,
                                    label="Max new tokens"
                                )
                            with gr.Column():
                                temperature_slider = gr.Slider(
                                    minimum=0.1,
                                    maximum=4.0,
                                    value=0.7,
                                    step=0.1,
                                    label="Temperature"
                                )
                    
                    with gr.TabItem("Advanced Parameters"):
                        with gr.Row(elem_classes="parameter-row"):
                            with gr.Column():
                                top_p_slider = gr.Slider(
                                    minimum=0.1,
                                    maximum=1.0,
                                    value=0.95,
                                    step=0.05,
                                    label="Top-P"
                                )
                            with gr.Column():
                                frequency_penalty_slider = gr.Slider(
                                    minimum=-2.0,
                                    maximum=2.0,
                                    value=0.0,
                                    step=0.1,
                                    label="Frequency Penalty"
                                )
                        
                        seed_slider = gr.Slider(
                            minimum=-1,
                            maximum=65535,
                            value=-1,
                            step=1,
                            label="Seed (-1 for random)"
                        )
        
        with gr.Column(scale=2):
            # Model selection panel
            with gr.Box():
                gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")
                
                # Custom model input (this is what the respond function sees)
                custom_model_box = gr.Textbox(
                    value="meta-llama/Llama-3.3-70B-Instruct",
                    label="Selected Model",
                    elem_id="custom-model-input"
                )
                
                # Search box
                model_search_box = gr.Textbox(
                    label="Search Models",
                    placeholder="Type to filter models...",
                    lines=1
                )
                
                # Dynamic model display area
                model_display = gr.HTML(update_model_display())
                
                # Model information display
                gr.HTML("<h4>Current Model Info</h4>")
                model_info_display = gr.Markdown(get_model_info("meta-llama/Llama-3.3-70B-Instruct"))
    
    # Footer
    gr.HTML("""
    <div class="footer">
        <p>Created with Gradio • Powered by Hugging Face Inference API</p>
        <p>This interface allows you to chat with various language models without requiring a GPU</p>
    </div>
    """)
    
    # Set up event handlers
    msg.submit(
        fn=respond,
        inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, 
                top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
        outputs=[chatbot],
        queue=True
    )
    
    submit_btn.click(
        fn=respond,
        inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, 
                top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
        outputs=[chatbot],
        queue=True
    )
    
    # Update model display when search changes
    model_search_box.change(
        fn=lambda x: update_model_display(x),
        inputs=model_search_box,
        outputs=model_display
    )
    
    # Update model info when selection changes
    custom_model_box.change(
        fn=lambda x: get_model_info(x),
        inputs=custom_model_box,
        outputs=model_info_display
    )

if __name__ == "__main__":
    print("Launching the enhanced multi-model chat interface.")
    demo.launch()