Spaces:
Running
Running
File size: 5,785 Bytes
153a45e 4559ffe 153a45e 8c089fc 153a45e 80bd43d 153a45e 8f82122 80bd43d 153a45e 80bd43d 153a45e 80bd43d 8f82122 80bd43d 8f82122 80bd43d 153a45e 80bd43d 153a45e 4559ffe 80bd43d 4559ffe 80bd43d 4559ffe 80bd43d 153a45e 80bd43d 3ed8771 153a45e 80bd43d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import gradio as gr
from openai import OpenAI
import os
from datetime import datetime
# App title and description
APP_TITLE = "NO GPU, Multi LLMs Uses"
APP_DESCRIPTION = "Access and chat with multiple language models without requiring a GPU"
# Load environment variables
ACCESS_TOKEN = os.getenv("HF_TOKEN")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
# Model categories for better organization
MODEL_CATEGORIES = {
"Qwen": [
"Qwen/Qwen2.5-72B-Instruct",
"Qwen/Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-0.5B-Instruct",
"Qwen/Qwen2.5-Coder-32B-Instruct",
],
"Meta LLaMa": [
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-3.1-70B-Instruct",
"meta-llama/Llama-3.0-70B-Instruct",
"meta-llama/Llama-3.2-3B-Instruct",
"meta-llama/Llama-3.2-1B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
],
"Mistral": [
"mistralai/Mistral-Nemo-Instruct-2407",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3",
"mistralai/Mistral-7B-Instruct-v0.2",
],
"Microsoft Phi": [
"microsoft/Phi-3.5-mini-instruct",
"microsoft/Phi-3-mini-128k-instruct",
"microsoft/Phi-3-mini-4k-instruct",
],
"Other Models": [
"NousResearch/Hermes-3-Llama-3.1-8B",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"HuggingFaceH4/zephyr-7b-beta",
"HuggingFaceTB/SmolLM2-360M-Instruct",
"tiiuae/falcon-7b-instruct",
"01-ai/Yi-1.5-34B-Chat",
]
}
# Flatten the model list
ALL_MODELS = [m for models in MODEL_CATEGORIES.values() for m in models]
def get_model_info(model_name):
parts = model_name.split('/')
if len(parts) != 2:
return f"**Model:** {model_name}\n**Format:** Unknown"
org, model = parts
import re
size_match = re.search(r'(\d+\.?\d*)B', model)
size = size_match.group(1) + "B" if size_match else "Unknown"
return f"**Organization:** {org}\n**Model:** {model}\n**Size:** {size}"
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
selected_model
):
# Prepare messages
if seed == -1:
seed = None
messages = [{"role": "system", "content": system_message}]
for user_msg, assistant_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
messages.append({"role": "user", "content": message})
model_to_use = selected_model or ALL_MODELS[0]
new_history = list(history) + [(message, "")]
current_response = ""
try:
for chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
delta = chunk.choices[0].delta.content
if delta:
current_response += delta
new_history[-1] = (message, current_response)
yield new_history
except Exception as e:
err = f"Error: {e}"
new_history[-1] = (message, err)
yield new_history
with gr.Blocks(title=APP_TITLE, theme=gr.themes.Soft()) as demo:
gr.Markdown(f"## {APP_TITLE}\n\n{APP_DESCRIPTION}")
with gr.Row():
with gr.Column(scale=2):
# Model selection via Dropdown
selected_model = gr.Dropdown(
choices=ALL_MODELS,
value=ALL_MODELS[0],
label="Select Model"
)
model_info = gr.Markdown(get_model_info(ALL_MODELS[0]))
def update_info(model_name):
return get_model_info(model_name)
selected_model.change(
fn=update_info,
inputs=[selected_model],
outputs=[model_info]
)
# Conversation settings
system_message = gr.Textbox(
value="You are a helpful assistant.",
label="System Prompt",
lines=2
)
max_tokens = gr.Slider(1, 4096, value=512, label="Max New Tokens")
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
freq_penalty = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
seed = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 random)")
with gr.Column(scale=3):
chatbot = gr.Chatbot()
msg = gr.Textbox(placeholder="Type your message here...", show_label=False)
send_btn = gr.Button("Send")
send_btn.click(
fn=respond,
inputs=[
msg, chatbot, system_message,
max_tokens, temperature, top_p,
freq_penalty, seed, selected_model
],
outputs=[chatbot],
queue=True
)
msg.submit(
fn=respond,
inputs=[
msg, chatbot, system_message,
max_tokens, temperature, top_p,
freq_penalty, seed, selected_model
],
outputs=[chatbot],
queue=True
)
demo.launch()
|