Spaces:
Running
Running
File size: 3,020 Bytes
e31868e 885cb8b d60771a e31868e 7de1329 9ea2212 26425fc 39c0843 7de1329 d60771a 7de1329 a8b1809 220eb38 d60771a 220eb38 a8b1809 7de1329 d60771a 885cb8b a8b1809 885cb8b e31868e 7de1329 555da97 e31868e 885cb8b e31868e 885cb8b a8b1809 d60771a 885cb8b 7de1329 885cb8b a8b1809 885cb8b e31868e ca61f86 b061628 5242bc5 b061628 5242bc5 b061628 f184505 d60771a 5242bc5 b061628 7de1329 a8b1809 555da97 e31868e 7de1329 a8b1809 7de1329 885cb8b 7de1329 885cb8b e31868e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
import time
import re
HF_TOKEN = os.getenv("HF_TOKEN")
client = InferenceClient(
provider="sambanova",
api_key=HF_TOKEN,
)
MODELS = {
"LLaMA 70B": "meta-llama/Llama-3.3-70B-Instruct",
"Qwen 32B": "Qwen/QwQ-32B",
"DeepSeek R1": "deepseek-ai/DeepSeek-R1"
}
history_log = []
recent_history = []
show_history_flag = False
def clean_response(text):
text = re.sub(r"</?think>", "", text)
text = re.sub(r"\\boxed\\{.*?\\}", "", text)
return text.strip()
def chatbot_response(user_input, model_name):
model_id = MODELS[model_name]
# Mantém apenas as 2 últimas interações para o modelo
messages = recent_history[-2:] if len(recent_history) >= 2 else []
messages.append({"role": "user", "content": user_input})
start_time = time.time()
try:
completion = client.chat.completions.create(
model=model_id,
messages=messages,
max_tokens=8192 if "Qwen" in model_id else 900
)
response = completion.choices[0].message['content']
except Exception as e:
response = f"Erro ao gerar resposta: {str(e)}"
end_time = time.time()
# Atualiza os dois históricos
recent_history.append({"role": "user", "content": user_input})
recent_history.append({"role": "assistant", "content": response})
history_log.append({
"Modelo": model_name,
"Pergunta": user_input,
"Resposta": response,
"Tempo de Resposta (s)": round(end_time - start_time, 2)
})
return response
with gr.Blocks(theme=gr.themes.Soft()) as demo:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## ⚙️ Configurações")
model_selector = gr.Dropdown(
choices=list(MODELS.keys()),
label="Escolha o Modelo",
value="LLaMA 70B"
)
with gr.Column(scale=3):
gr.Markdown("# 🤖 Chatbot - API SambaNova")
chatbot = gr.Chatbot(height=500)
msg = gr.Textbox(placeholder="Digite sua mensagem aqui..", show_label=False)
btn = gr.Button("Enviar", variant="primary")
history_btn = gr.Button("Histórico", variant="secondary")
history_output = gr.JSON()
def respond(message, chat_history, model_name):
response = chatbot_response(message, model_name)
response = clean_response(response)
chat_history.append((message, response))
return "", chat_history
btn.click(respond, [msg, chatbot, model_selector], [msg, chatbot])
msg.submit(respond, [msg, chatbot, model_selector], [msg, chatbot])
def toggle_history():
global show_history_flag
show_history_flag = not show_history_flag
return history_log if show_history_flag else {}
history_btn.click(toggle_history, inputs=[], outputs=history_output)
if __name__ == "__main__":
demo.launch()
|