File size: 14,617 Bytes
153a45e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
import gradio as gr
from openai import OpenAI
import os
from datetime import datetime

# App title and description
APP_TITLE = "NO GPU, Multi LLMs Uses"
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"

# Load environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")

client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    frequency_penalty,
    seed,
    custom_model
):
    print(f"Received message: {message}")
    print(f"Selected model: {custom_model}")

    # Convert seed to None if -1 (meaning random)
    if seed == -1:
        seed = None

    messages = [{"role": "system", "content": system_message}]

    # Add conversation history to the context
    for val in history:
        user_part = val[0]
        assistant_part = val[1]
        if user_part:
            messages.append({"role": "user", "content": user_part})
        if assistant_part:
            messages.append({"role": "assistant", "content": assistant_part})

    # Append the latest user message
    messages.append({"role": "user", "content": message})

    # If user provided a model, use that; otherwise, fall back to a default model
    model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
    
    # Start with an empty string to build the response as tokens stream in
    response = ""

    try:
        for message_chunk in client.chat.completions.create(
            model=model_to_use,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            seed=seed,
            messages=messages,
        ):
            token_text = message_chunk.choices[0].delta.content
            if token_text is not None:  # Handle None type in response
                response += token_text
                yield response
    except Exception as e:
        yield f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."

    print("Completed response generation.")


# Model categories for better organization
MODEL_CATEGORIES = {
    "Meta LLaMa": [
        "meta-llama/Llama-3.3-70B-Instruct",
        "meta-llama/Llama-3.1-70B-Instruct",
        "meta-llama/Llama-3.0-70B-Instruct",
        "meta-llama/Llama-3.2-3B-Instruct",
        "meta-llama/Llama-3.2-1B-Instruct",
        "meta-llama/Llama-3.1-8B-Instruct",
    ],
    "Mistral": [
        "mistralai/Mistral-Nemo-Instruct-2407",
        "mistralai/Mixtral-8x7B-Instruct-v0.1",
        "mistralai/Mistral-7B-Instruct-v0.3",
        "mistralai/Mistral-7B-Instruct-v0.2",
    ],
    "Qwen": [
        "Qwen/Qwen3-235B-A22B",
        "Qwen/Qwen3-32B",
        "Qwen/Qwen2.5-72B-Instruct",
        "Qwen/Qwen2.5-3B-Instruct",
        "Qwen/Qwen2.5-0.5B-Instruct",
        "Qwen/QwQ-32B",
        "Qwen/Qwen2.5-Coder-32B-Instruct",
    ],
    "Microsoft Phi": [
        "microsoft/Phi-3.5-mini-instruct",
        "microsoft/Phi-3-mini-128k-instruct",
        "microsoft/Phi-3-mini-4k-instruct",
    ],
    "Other Models": [
        "NousResearch/Hermes-3-Llama-3.1-8B",
        "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
        "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
        "HuggingFaceH4/zephyr-7b-beta",
        "HuggingFaceTB/SmolLM2-360M-Instruct",
        "tiiuae/falcon-7b-instruct",
        "01-ai/Yi-1.5-34B-Chat",
    ]
}

# Flatten the model list for search functionality
ALL_MODELS = []
for category, models in MODEL_CATEGORIES.items():
    ALL_MODELS.extend(models)


# Helper function to get model info display
def get_model_info(model_name):
    """Extract and format model information for display"""
    parts = model_name.split('/')
    org = parts[0]
    model = parts[1]
    
    # Extract numbers from model name to determine size
    import re
    size_match = re.search(r'(\d+\.?\d*)B', model)
    size = size_match.group(1) + "B" if size_match else "Unknown"
    
    return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"


def filter_models(search_term):
    """Filter models based on search term across all categories"""
    if not search_term:
        return MODEL_CATEGORIES
    
    filtered_categories = {}
    for category, models in MODEL_CATEGORIES.items():
        filtered_models = [m for m in models if search_term.lower() in m.lower()]
        if filtered_models:
            filtered_categories[category] = filtered_models
    
    return filtered_categories


def update_model_display(search_term=""):
    """Update the model selection UI based on search term"""
    filtered_categories = filter_models(search_term)
    
    # Create HTML for model display
    html = "<div style='max-height: 400px; overflow-y: auto;'>"
    
    for category, models in filtered_categories.items():
        html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"
        
        for model in models:
            model_short = model.split('/')[-1]
            html += f"""
            <div class='model-card' onclick='selectModel("{model}")' 
                 style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
                        background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
                <div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
                <div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
            </div>
            """
        html += "</div>"
    
    if not filtered_categories:
        html += "<p>No models found matching your search.</p>"
    
    html += "</div><script>function selectModel(model) { document.getElementById('custom-model-input').value = model; }</script>"
    return html


# Create custom CSS for better styling
custom_css = """
#app-container {
    max-width: 1200px;
    margin: 0 auto;
    padding: 20px;
}

#chat-container {
    border-radius: 12px;
    box-shadow: 0 8px 16px rgba(0,0,0,0.1);
    overflow: hidden;
}

.contain {
    background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
}

h1, h2, h3 {
    font-family: 'Poppins', sans-serif;
}

h1 {
    background: linear-gradient(90deg, #2b6cb0, #4299e1);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-weight: 700;
    letter-spacing: -0.5px;
    margin-bottom: 8px;
}

.parameter-row {
    display: flex;
    gap: 10px;
    margin-bottom: 10px;
}

.model-card:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 12px rgba(0,0,0,0.15);
    border-color: #4299e1;
}

.tabs {
    box-shadow: 0 2px 10px rgba(0,0,0,0.05);
    border-radius: 8px;
    overflow: hidden;
}

.footer {
    text-align: center;
    margin-top: 20px;
    font-size: 0.8em;
    color: #666;
}

/* Status indicator styles */
.status-indicator {
    display: inline-block;
    width: 10px;
    height: 10px;
    border-radius: 50%;
    margin-right: 6px;
}

.status-active {
    background-color: #10B981;
    animation: pulse 2s infinite;
}

@keyframes pulse {
    0% {
        box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
    }
    70% {
        box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
    }
    100% {
        box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
    }
}

/* Parameter tooltips */
.parameter-container {
    position: relative;
}

.parameter-info {
    display: none;
    position: absolute;
    background: white;
    border: 1px solid #ddd;
    padding: 10px;
    border-radius: 6px;
    box-shadow: 0 2px 5px rgba(0,0,0,0.2);
    z-index: 100;
    width: 250px;
    top: 100%;
    left: 10px;
}

.parameter-container:hover .parameter-info {
    display: block;
}
"""

with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
    gr.HTML(f"""
    <div id="app-container">
        <div style="text-align: center; padding: 20px 0;">
            <h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
            <p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
            <div style="margin-top: 10px;">
                <span class="status-indicator status-active"></span>
                <span>Service Active</span>
                <span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
            </div>
        </div>
    </div>
    """)
    
    with gr.Row():
        with gr.Column(scale=3):
            # Main chat interface
            with gr.Box(elem_id="chat-container"):
                chatbot = gr.Chatbot(
                    height=550, 
                    show_copy_button=True, 
                    placeholder="Select a model and begin chatting", 
                    layout="panel"
                )
                
                with gr.Row():
                    with gr.Column(scale=8):
                        msg = gr.Textbox(
                            show_label=False,
                            placeholder="Type your message here...",
                            container=False,
                            scale=8
                        )
                    with gr.Column(scale=1, min_width=70):
                        submit_btn = gr.Button("Send", variant="primary", scale=1)
            
            with gr.Accordion("Conversation Settings", open=False):
                system_message_box = gr.Textbox(
                    value="You are a helpful assistant.", 
                    placeholder="System prompt that guides the assistant's behavior", 
                    label="System Prompt",
                    lines=2
                )
                
                with gr.Tabs(elem_classes="tabs"):
                    with gr.TabItem("Basic Parameters"):
                        with gr.Row(elem_classes="parameter-row"):
                            with gr.Column():
                                max_tokens_slider = gr.Slider(
                                    minimum=1,
                                    maximum=4096,
                                    value=512,
                                    step=1,
                                    label="Max new tokens"
                                )
                            with gr.Column():
                                temperature_slider = gr.Slider(
                                    minimum=0.1,
                                    maximum=4.0,
                                    value=0.7,
                                    step=0.1,
                                    label="Temperature"
                                )
                    
                    with gr.TabItem("Advanced Parameters"):
                        with gr.Row(elem_classes="parameter-row"):
                            with gr.Column():
                                top_p_slider = gr.Slider(
                                    minimum=0.1,
                                    maximum=1.0,
                                    value=0.95,
                                    step=0.05,
                                    label="Top-P"
                                )
                            with gr.Column():
                                frequency_penalty_slider = gr.Slider(
                                    minimum=-2.0,
                                    maximum=2.0,
                                    value=0.0,
                                    step=0.1,
                                    label="Frequency Penalty"
                                )
                        
                        seed_slider = gr.Slider(
                            minimum=-1,
                            maximum=65535,
                            value=-1,
                            step=1,
                            label="Seed (-1 for random)"
                        )
        
        with gr.Column(scale=2):
            # Model selection panel
            with gr.Box():
                gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")
                
                # Custom model input (this is what the respond function sees)
                custom_model_box = gr.Textbox(
                    value="meta-llama/Llama-3.3-70B-Instruct",
                    label="Selected Model",
                    elem_id="custom-model-input"
                )
                
                # Search box
                model_search_box = gr.Textbox(
                    label="Search Models",
                    placeholder="Type to filter models...",
                    lines=1
                )
                
                # Dynamic model display area
                model_display = gr.HTML(update_model_display())
                
                # Model information display
                gr.HTML("<h4>Current Model Info</h4>")
                model_info_display = gr.Markdown(get_model_info("meta-llama/Llama-3.3-70B-Instruct"))
    
    # Footer
    gr.HTML("""
    <div class="footer">
        <p>Created with Gradio • Powered by Hugging Face Inference API</p>
        <p>This interface allows you to chat with various language models without requiring a GPU</p>
    </div>
    """)
    
    # Set up event handlers
    msg.submit(
        fn=respond,
        inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, 
                top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
        outputs=[chatbot],
        queue=True
    )
    
    submit_btn.click(
        fn=respond,
        inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider, 
                top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
        outputs=[chatbot],
        queue=True
    )
    
    # Update model display when search changes
    model_search_box.change(
        fn=lambda x: update_model_display(x),
        inputs=model_search_box,
        outputs=model_display
    )
    
    # Update model info when selection changes
    custom_model_box.change(
        fn=lambda x: get_model_info(x),
        inputs=custom_model_box,
        outputs=model_info_display
    )

if __name__ == "__main__":
    print("Launching the enhanced multi-model chat interface.")
    demo.launch()