Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from huggingface_hub import InferenceClient | |
from typing import List, Tuple, Optional | |
# Инициализация FastAPI | |
app = FastAPI() | |
# Глобальные переменные для хранения состояния | |
client = None | |
system_message = "You are a friendly Chatbot." | |
max_tokens = 1024 | |
temperature = 0.7 | |
top_p = 0.95 | |
history = [] | |
total_tokens = 0 | |
# Модель данных для запроса | |
class ChatRequest(BaseModel): | |
message: str | |
reset_history: bool = False # Флаг для очистки истории | |
# Модель данных для инициализации | |
class InitializeRequest(BaseModel): | |
model_name: str = "Qwen/Qwen2.5-Coder-32B-Instruct" | |
system_message: str = "You are a friendly Chatbot." | |
max_tokens: int = 1024 | |
temperature: float = 0.7 | |
top_p: float = 0.95 | |
# Инициализация модели | |
def initialize_model( | |
model_name: str, | |
sys_message: str, | |
tokens: int, | |
temp: float, | |
top_p_value: float, | |
): | |
global client, system_message, max_tokens, temperature, top_p | |
client = InferenceClient(model_name) | |
system_message = sys_message | |
max_tokens = tokens | |
temperature = temp | |
top_p = top_p_value | |
# Маршрут для инициализации модели | |
async def initialize(request: InitializeRequest): | |
try: | |
initialize_model( | |
model_name=request.model_name, | |
sys_message=request.system_message, | |
tokens=request.max_tokens, | |
temp=request.temperature, | |
top_p_value=request.top_p, | |
) | |
return {"status": "Model initialized successfully."} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
# Маршрут для обработки запросов | |
async def chat(request: ChatRequest): | |
global history, total_tokens | |
try: | |
# Очистка истории, если запрошено | |
if request.reset_history: | |
history = [] | |
total_tokens = 0 | |
return {"response": "History cleared.", "total_tokens": total_tokens} | |
# Формируем сообщения для модели | |
messages = [{"role": "system", "content": system_message}] | |
# Добавляем историю диалога | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
# Добавляем текущее сообщение пользователя | |
messages.append({"role": "user", "content": request.message}) | |
# Получаем ответ от модели | |
response = "" | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
# Обновляем историю и счетчик токенов | |
history.append((request.message, response)) | |
total_tokens += len(response.split()) # Примерный подсчет токенов | |
# Возвращаем ответ и количество токенов | |
return {"response": response, "total_tokens": total_tokens} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
# Маршрут для получения текущего состояния (история и токены) | |
async def get_status(): | |
return { | |
"history": history, | |
"total_tokens": total_tokens, | |
} | |
# Инициализация модели при запуске | |
initialize_model( | |
model_name="Qwen/Qwen2.5-Coder-32B-Instruct", | |
sys_message="You are a friendly Chatbot.", | |
tokens=1024, | |
temp=0.7, | |
top_p_value=0.95, | |
) | |
# Запуск приложения (для локального тестирования) | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) |