keeperballon commited on
Commit
153a45e
·
verified ·
1 Parent(s): 93a0aa3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +451 -0
app.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from openai import OpenAI
3
+ import os
4
+ from datetime import datetime
5
+
6
+ # App title and description
7
+ APP_TITLE = "NO GPU, Multi LLMs Uses"
8
+ APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"
9
+
10
+ # Load environment variables
11
+ ACCESS_TOKEN = os.getenv("HF_TOKEN")
12
+ print("Access token loaded.")
13
+
14
+ client = OpenAI(
15
+ base_url="https://api-inference.huggingface.co/v1/",
16
+ api_key=ACCESS_TOKEN,
17
+ )
18
+ print("OpenAI client initialized.")
19
+
20
+
21
+ def respond(
22
+ message,
23
+ history: list[tuple[str, str]],
24
+ system_message,
25
+ max_tokens,
26
+ temperature,
27
+ top_p,
28
+ frequency_penalty,
29
+ seed,
30
+ custom_model
31
+ ):
32
+ print(f"Received message: {message}")
33
+ print(f"Selected model: {custom_model}")
34
+
35
+ # Convert seed to None if -1 (meaning random)
36
+ if seed == -1:
37
+ seed = None
38
+
39
+ messages = [{"role": "system", "content": system_message}]
40
+
41
+ # Add conversation history to the context
42
+ for val in history:
43
+ user_part = val[0]
44
+ assistant_part = val[1]
45
+ if user_part:
46
+ messages.append({"role": "user", "content": user_part})
47
+ if assistant_part:
48
+ messages.append({"role": "assistant", "content": assistant_part})
49
+
50
+ # Append the latest user message
51
+ messages.append({"role": "user", "content": message})
52
+
53
+ # If user provided a model, use that; otherwise, fall back to a default model
54
+ model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.3-70B-Instruct"
55
+
56
+ # Start with an empty string to build the response as tokens stream in
57
+ response = ""
58
+
59
+ try:
60
+ for message_chunk in client.chat.completions.create(
61
+ model=model_to_use,
62
+ max_tokens=max_tokens,
63
+ stream=True,
64
+ temperature=temperature,
65
+ top_p=top_p,
66
+ frequency_penalty=frequency_penalty,
67
+ seed=seed,
68
+ messages=messages,
69
+ ):
70
+ token_text = message_chunk.choices[0].delta.content
71
+ if token_text is not None: # Handle None type in response
72
+ response += token_text
73
+ yield response
74
+ except Exception as e:
75
+ yield f"Error: {str(e)}\n\nPlease check your model selection and parameters, or try again later."
76
+
77
+ print("Completed response generation.")
78
+
79
+
80
+ # Model categories for better organization
81
+ MODEL_CATEGORIES = {
82
+ "Meta LLaMa": [
83
+ "meta-llama/Llama-3.3-70B-Instruct",
84
+ "meta-llama/Llama-3.1-70B-Instruct",
85
+ "meta-llama/Llama-3.0-70B-Instruct",
86
+ "meta-llama/Llama-3.2-3B-Instruct",
87
+ "meta-llama/Llama-3.2-1B-Instruct",
88
+ "meta-llama/Llama-3.1-8B-Instruct",
89
+ ],
90
+ "Mistral": [
91
+ "mistralai/Mistral-Nemo-Instruct-2407",
92
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
93
+ "mistralai/Mistral-7B-Instruct-v0.3",
94
+ "mistralai/Mistral-7B-Instruct-v0.2",
95
+ ],
96
+ "Qwen": [
97
+ "Qwen/Qwen3-235B-A22B",
98
+ "Qwen/Qwen3-32B",
99
+ "Qwen/Qwen2.5-72B-Instruct",
100
+ "Qwen/Qwen2.5-3B-Instruct",
101
+ "Qwen/Qwen2.5-0.5B-Instruct",
102
+ "Qwen/QwQ-32B",
103
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
104
+ ],
105
+ "Microsoft Phi": [
106
+ "microsoft/Phi-3.5-mini-instruct",
107
+ "microsoft/Phi-3-mini-128k-instruct",
108
+ "microsoft/Phi-3-mini-4k-instruct",
109
+ ],
110
+ "Other Models": [
111
+ "NousResearch/Hermes-3-Llama-3.1-8B",
112
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
113
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
114
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
115
+ "HuggingFaceH4/zephyr-7b-beta",
116
+ "HuggingFaceTB/SmolLM2-360M-Instruct",
117
+ "tiiuae/falcon-7b-instruct",
118
+ "01-ai/Yi-1.5-34B-Chat",
119
+ ]
120
+ }
121
+
122
+ # Flatten the model list for search functionality
123
+ ALL_MODELS = []
124
+ for category, models in MODEL_CATEGORIES.items():
125
+ ALL_MODELS.extend(models)
126
+
127
+
128
+ # Helper function to get model info display
129
+ def get_model_info(model_name):
130
+ """Extract and format model information for display"""
131
+ parts = model_name.split('/')
132
+ org = parts[0]
133
+ model = parts[1]
134
+
135
+ # Extract numbers from model name to determine size
136
+ import re
137
+ size_match = re.search(r'(\d+\.?\d*)B', model)
138
+ size = size_match.group(1) + "B" if size_match else "Unknown"
139
+
140
+ return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"
141
+
142
+
143
+ def filter_models(search_term):
144
+ """Filter models based on search term across all categories"""
145
+ if not search_term:
146
+ return MODEL_CATEGORIES
147
+
148
+ filtered_categories = {}
149
+ for category, models in MODEL_CATEGORIES.items():
150
+ filtered_models = [m for m in models if search_term.lower() in m.lower()]
151
+ if filtered_models:
152
+ filtered_categories[category] = filtered_models
153
+
154
+ return filtered_categories
155
+
156
+
157
+ def update_model_display(search_term=""):
158
+ """Update the model selection UI based on search term"""
159
+ filtered_categories = filter_models(search_term)
160
+
161
+ # Create HTML for model display
162
+ html = "<div style='max-height: 400px; overflow-y: auto;'>"
163
+
164
+ for category, models in filtered_categories.items():
165
+ html += f"<h3>{category}</h3><div style='display: grid; grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); gap: 10px;'>"
166
+
167
+ for model in models:
168
+ model_short = model.split('/')[-1]
169
+ html += f"""
170
+ <div class='model-card' onclick='selectModel("{model}")'
171
+ style='border: 1px solid #ddd; border-radius: 8px; padding: 12px; cursor: pointer; transition: all 0.2s;
172
+ background: linear-gradient(145deg, #f0f0f0, #ffffff); box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
173
+ <div style='font-weight: bold; margin-bottom: 6px; color: #1a73e8;'>{model_short}</div>
174
+ <div style='font-size: 0.8em; color: #666;'>{model.split('/')[0]}</div>
175
+ </div>
176
+ """
177
+ html += "</div>"
178
+
179
+ if not filtered_categories:
180
+ html += "<p>No models found matching your search.</p>"
181
+
182
+ html += "</div><script>function selectModel(model) { document.getElementById('custom-model-input').value = model; }</script>"
183
+ return html
184
+
185
+
186
+ # Create custom CSS for better styling
187
+ custom_css = """
188
+ #app-container {
189
+ max-width: 1200px;
190
+ margin: 0 auto;
191
+ padding: 20px;
192
+ }
193
+
194
+ #chat-container {
195
+ border-radius: 12px;
196
+ box-shadow: 0 8px 16px rgba(0,0,0,0.1);
197
+ overflow: hidden;
198
+ }
199
+
200
+ .contain {
201
+ background: linear-gradient(135deg, #f5f7fa 0%, #e4e7eb 100%);
202
+ }
203
+
204
+ h1, h2, h3 {
205
+ font-family: 'Poppins', sans-serif;
206
+ }
207
+
208
+ h1 {
209
+ background: linear-gradient(90deg, #2b6cb0, #4299e1);
210
+ -webkit-background-clip: text;
211
+ -webkit-text-fill-color: transparent;
212
+ font-weight: 700;
213
+ letter-spacing: -0.5px;
214
+ margin-bottom: 8px;
215
+ }
216
+
217
+ .parameter-row {
218
+ display: flex;
219
+ gap: 10px;
220
+ margin-bottom: 10px;
221
+ }
222
+
223
+ .model-card:hover {
224
+ transform: translateY(-2px);
225
+ box-shadow: 0 6px 12px rgba(0,0,0,0.15);
226
+ border-color: #4299e1;
227
+ }
228
+
229
+ .tabs {
230
+ box-shadow: 0 2px 10px rgba(0,0,0,0.05);
231
+ border-radius: 8px;
232
+ overflow: hidden;
233
+ }
234
+
235
+ .footer {
236
+ text-align: center;
237
+ margin-top: 20px;
238
+ font-size: 0.8em;
239
+ color: #666;
240
+ }
241
+
242
+ /* Status indicator styles */
243
+ .status-indicator {
244
+ display: inline-block;
245
+ width: 10px;
246
+ height: 10px;
247
+ border-radius: 50%;
248
+ margin-right: 6px;
249
+ }
250
+
251
+ .status-active {
252
+ background-color: #10B981;
253
+ animation: pulse 2s infinite;
254
+ }
255
+
256
+ @keyframes pulse {
257
+ 0% {
258
+ box-shadow: 0 0 0 0 rgba(16, 185, 129, 0.7);
259
+ }
260
+ 70% {
261
+ box-shadow: 0 0 0 5px rgba(16, 185, 129, 0);
262
+ }
263
+ 100% {
264
+ box-shadow: 0 0 0 0 rgba(16, 185, 129, 0);
265
+ }
266
+ }
267
+
268
+ /* Parameter tooltips */
269
+ .parameter-container {
270
+ position: relative;
271
+ }
272
+
273
+ .parameter-info {
274
+ display: none;
275
+ position: absolute;
276
+ background: white;
277
+ border: 1px solid #ddd;
278
+ padding: 10px;
279
+ border-radius: 6px;
280
+ box-shadow: 0 2px 5px rgba(0,0,0,0.2);
281
+ z-index: 100;
282
+ width: 250px;
283
+ top: 100%;
284
+ left: 10px;
285
+ }
286
+
287
+ .parameter-container:hover .parameter-info {
288
+ display: block;
289
+ }
290
+ """
291
+
292
+ with gr.Blocks(css=custom_css, title=APP_TITLE, theme=gr.themes.Soft()) as demo:
293
+ gr.HTML(f"""
294
+ <div id="app-container">
295
+ <div style="text-align: center; padding: 20px 0;">
296
+ <h1 style="font-size: 2.5rem;">{APP_TITLE}</h1>
297
+ <p style="font-size: 1.1rem; color: #555;">{APP_DESCRIPTION}</p>
298
+ <div style="margin-top: 10px;">
299
+ <span class="status-indicator status-active"></span>
300
+ <span>Service Active</span>
301
+ <span style="margin-left: 15px;">Last Updated: {datetime.now().strftime('%Y-%m-%d')}</span>
302
+ </div>
303
+ </div>
304
+ </div>
305
+ """)
306
+
307
+ with gr.Row():
308
+ with gr.Column(scale=3):
309
+ # Main chat interface
310
+ with gr.Box(elem_id="chat-container"):
311
+ chatbot = gr.Chatbot(
312
+ height=550,
313
+ show_copy_button=True,
314
+ placeholder="Select a model and begin chatting",
315
+ layout="panel"
316
+ )
317
+
318
+ with gr.Row():
319
+ with gr.Column(scale=8):
320
+ msg = gr.Textbox(
321
+ show_label=False,
322
+ placeholder="Type your message here...",
323
+ container=False,
324
+ scale=8
325
+ )
326
+ with gr.Column(scale=1, min_width=70):
327
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
328
+
329
+ with gr.Accordion("Conversation Settings", open=False):
330
+ system_message_box = gr.Textbox(
331
+ value="You are a helpful assistant.",
332
+ placeholder="System prompt that guides the assistant's behavior",
333
+ label="System Prompt",
334
+ lines=2
335
+ )
336
+
337
+ with gr.Tabs(elem_classes="tabs"):
338
+ with gr.TabItem("Basic Parameters"):
339
+ with gr.Row(elem_classes="parameter-row"):
340
+ with gr.Column():
341
+ max_tokens_slider = gr.Slider(
342
+ minimum=1,
343
+ maximum=4096,
344
+ value=512,
345
+ step=1,
346
+ label="Max new tokens"
347
+ )
348
+ with gr.Column():
349
+ temperature_slider = gr.Slider(
350
+ minimum=0.1,
351
+ maximum=4.0,
352
+ value=0.7,
353
+ step=0.1,
354
+ label="Temperature"
355
+ )
356
+
357
+ with gr.TabItem("Advanced Parameters"):
358
+ with gr.Row(elem_classes="parameter-row"):
359
+ with gr.Column():
360
+ top_p_slider = gr.Slider(
361
+ minimum=0.1,
362
+ maximum=1.0,
363
+ value=0.95,
364
+ step=0.05,
365
+ label="Top-P"
366
+ )
367
+ with gr.Column():
368
+ frequency_penalty_slider = gr.Slider(
369
+ minimum=-2.0,
370
+ maximum=2.0,
371
+ value=0.0,
372
+ step=0.1,
373
+ label="Frequency Penalty"
374
+ )
375
+
376
+ seed_slider = gr.Slider(
377
+ minimum=-1,
378
+ maximum=65535,
379
+ value=-1,
380
+ step=1,
381
+ label="Seed (-1 for random)"
382
+ )
383
+
384
+ with gr.Column(scale=2):
385
+ # Model selection panel
386
+ with gr.Box():
387
+ gr.HTML("<h3 style='margin-top: 0;'>Model Selection</h3>")
388
+
389
+ # Custom model input (this is what the respond function sees)
390
+ custom_model_box = gr.Textbox(
391
+ value="meta-llama/Llama-3.3-70B-Instruct",
392
+ label="Selected Model",
393
+ elem_id="custom-model-input"
394
+ )
395
+
396
+ # Search box
397
+ model_search_box = gr.Textbox(
398
+ label="Search Models",
399
+ placeholder="Type to filter models...",
400
+ lines=1
401
+ )
402
+
403
+ # Dynamic model display area
404
+ model_display = gr.HTML(update_model_display())
405
+
406
+ # Model information display
407
+ gr.HTML("<h4>Current Model Info</h4>")
408
+ model_info_display = gr.Markdown(get_model_info("meta-llama/Llama-3.3-70B-Instruct"))
409
+
410
+ # Footer
411
+ gr.HTML("""
412
+ <div class="footer">
413
+ <p>Created with Gradio • Powered by Hugging Face Inference API</p>
414
+ <p>This interface allows you to chat with various language models without requiring a GPU</p>
415
+ </div>
416
+ """)
417
+
418
+ # Set up event handlers
419
+ msg.submit(
420
+ fn=respond,
421
+ inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
422
+ top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
423
+ outputs=[chatbot],
424
+ queue=True
425
+ )
426
+
427
+ submit_btn.click(
428
+ fn=respond,
429
+ inputs=[msg, chatbot, system_message_box, max_tokens_slider, temperature_slider,
430
+ top_p_slider, frequency_penalty_slider, seed_slider, custom_model_box],
431
+ outputs=[chatbot],
432
+ queue=True
433
+ )
434
+
435
+ # Update model display when search changes
436
+ model_search_box.change(
437
+ fn=lambda x: update_model_display(x),
438
+ inputs=model_search_box,
439
+ outputs=model_display
440
+ )
441
+
442
+ # Update model info when selection changes
443
+ custom_model_box.change(
444
+ fn=lambda x: get_model_info(x),
445
+ inputs=custom_model_box,
446
+ outputs=model_info_display
447
+ )
448
+
449
+ if __name__ == "__main__":
450
+ print("Launching the enhanced multi-model chat interface.")
451
+ demo.launch()