# Подключение клиентов # - - - - - - - - - - - - - - from huggingface_hub import InferenceClient from together import Together # Подключение библиотек # - - - - - - - - - - - - - - import requests import gradio as gr import os import json #============================ #============================ # Список доступных моделей # - - - - - - - - - - - - - - models = { "together": [ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free", "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free" ], "huggingface": [ "google/gemma-3-27b-it", "Qwen/QwQ-32B", "Qwen/QwQ-32B-Preview", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "google/gemma-2-9b-it", "google/gemma-2-27b-it", "mistralai/Mistral-7B-Instruct-v0.3", "HuggingFaceH4/zephyr-7b-beta", "Qwen/Qwen2.5-72B-Instruct", "mistralai/Mistral-Nemo-Instruct-2407", "tiiuae/falcon-7b-instruct", "google/gemma-3-4b-it", "HuggingFaceH4/starchat2-15b-v0.1", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B" ] } #============================ #============================ # Функции для работы с сообщениями # - - - - - - - - - - - - - - def add_message(role, content, messages): messages.append({"role": role, "content": content}) return messages, len(messages), str(messages) def clear_messages(messages): return [], 0, "[]" def show_messages(messages): return str(messages) def get_messages_api(messages): return json.dumps(messages, indent=4) def run_huggingface_model(model, messages, max_tokens, temperature, top_p): API_TOKEN = os.getenv("HF_READ_TOKEN") headers = {"Authorization": f"Bearer {API_TOKEN}"} payload = { "messages": messages, "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, "stream": False # "inputs": json.dumps(messages), # "seed": random.randint(1, 1000000000), # "parameters": { # "max_tokens": max_tokens, # "temperature": temperature, # "top_p": top_p # } } model = "https://api-inference.huggingface.co/models/" + model + "/v1/chat/completions"; response = requests.post(model, headers=headers, json=payload, timeout=30) print("RESPONSE: ") print(response) if response.status_code != 200: response = json.loads(response.content) print("ERROR: " + response["error"]) else: print(response.content) try: response = json.loads(response.content) result = response["choices"][0]["message"]["content"] except: result = response.content return result def run_huggingface_model_alt(model, messages, max_tokens, temperature, top_p): client = InferenceClient(model) response = client.chat_completion( messages, max_tokens=max_tokens, stream=False, temperature=temperature, top_p=top_p, ) return response.choices[0].message.content def run_together_model(model, messages, max_tokens, temperature, top_p): client = Together() response = client.chat.completions.create( model=model, messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, ) return response.choices[0].message.content #============================ #============================ # Создаем интерфейс с вкладками demo = gr.Blocks() with demo: gr.Markdown("# Chat Interface") # Вкладки для Together и HuggingFace with gr.Tabs(): with gr.Tab("Together"): together_model_input = gr.Radio( label="Select a Together model", choices=models["together"], value=models["together"][0], ) together_run_button = gr.Button("Run Together") with gr.Tab("HuggingFace"): huggingface_model_input = gr.Radio( label="Select a HuggingFace model", choices=models["huggingface"], value=models["huggingface"][0], ) huggingface_run_button = gr.Button("Run HuggingFace") # Общие элементы интерфейса role_input = gr.Dropdown( label="Role", choices=["system", "user", "assistant"], # Список ролей value="user" # Значение по умолчанию ) content_input = gr.Textbox(label="Content") messages_state = gr.State(value=[]) messages_output = gr.Textbox(label="Messages", value="[]") count_output = gr.Number(label="Count", value=0) response_output = gr.Textbox(label="Response") messages_api_output = gr.Textbox(label="Messages API") add_button = gr.Button("Add") clear_button = gr.Button("Clear") show_button = gr.Button("Show messages") get_api_button = gr.Button("Get messages API") max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") # Обработчики событий для кнопок add_button.click( add_message, inputs=[role_input, content_input, messages_state], outputs=[messages_state, count_output, messages_output], ) clear_button.click( clear_messages, inputs=[messages_state], outputs=[messages_state, count_output, messages_output], ) show_button.click( show_messages, inputs=[messages_state], outputs=[messages_output], ) get_api_button.click( get_messages_api, inputs=[messages_state], outputs=[messages_api_output], ) # Обработчики событий для кнопок "Run" together_run_button.click( run_together_model, inputs=[together_model_input, messages_state, max_tokens_slider, temperature_slider, top_p_slider], outputs=[response_output], ) huggingface_run_button.click( run_huggingface_model_alt, inputs=[huggingface_model_input, messages_state, max_tokens_slider, temperature_slider, top_p_slider], outputs=[response_output], ) #============================ #============================ if __name__ == "__main__": demo.launch()