File size: 3,020 Bytes
e31868e
 
 
885cb8b
d60771a
e31868e
 
 
 
 
 
 
 
7de1329
9ea2212
26425fc
39c0843
7de1329
 
d60771a
 
7de1329
 
a8b1809
220eb38
d60771a
220eb38
a8b1809
7de1329
 
d60771a
 
 
885cb8b
a8b1809
885cb8b
e31868e
 
7de1329
 
555da97
e31868e
885cb8b
e31868e
885cb8b
 
a8b1809
d60771a
 
 
 
885cb8b
7de1329
885cb8b
 
 
 
a8b1809
885cb8b
e31868e
ca61f86
b061628
5242bc5
b061628
 
 
 
 
 
 
5242bc5
b061628
 
f184505
d60771a
5242bc5
 
b061628
7de1329
 
 
a8b1809
555da97
e31868e
7de1329
 
a8b1809
7de1329
885cb8b
 
 
 
7de1329
885cb8b
e31868e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
from huggingface_hub import InferenceClient
import os
import time
import re

HF_TOKEN = os.getenv("HF_TOKEN")

client = InferenceClient(
    provider="sambanova",
    api_key=HF_TOKEN,
)

MODELS = {
    "LLaMA 70B": "meta-llama/Llama-3.3-70B-Instruct",
    "Qwen 32B": "Qwen/QwQ-32B",
    "DeepSeek R1": "deepseek-ai/DeepSeek-R1"
}

history_log = []    
recent_history = []   
show_history_flag = False

def clean_response(text):
    text = re.sub(r"</?think>", "", text)
    text = re.sub(r"\\boxed\\{.*?\\}", "", text)
    return text.strip()

def chatbot_response(user_input, model_name):
    model_id = MODELS[model_name]
    
    # Mantém apenas as 2 últimas interações para o modelo
    messages = recent_history[-2:] if len(recent_history) >= 2 else []
    messages.append({"role": "user", "content": user_input})

    start_time = time.time()
    try:
        completion = client.chat.completions.create(
            model=model_id,
            messages=messages,
            max_tokens=8192 if "Qwen" in model_id else 900
        )
        response = completion.choices[0].message['content']
    except Exception as e:
        response = f"Erro ao gerar resposta: {str(e)}"
    end_time = time.time()

    # Atualiza os dois históricos
    recent_history.append({"role": "user", "content": user_input})
    recent_history.append({"role": "assistant", "content": response})

    history_log.append({
        "Modelo": model_name,
        "Pergunta": user_input,
        "Resposta": response,
        "Tempo de Resposta (s)": round(end_time - start_time, 2)
    })

    return response

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("## ⚙️ Configurações")
            model_selector = gr.Dropdown(
                choices=list(MODELS.keys()),
                label="Escolha o Modelo",
                value="LLaMA 70B"
            )
            
        with gr.Column(scale=3):
            gr.Markdown("# 🤖 Chatbot - API SambaNova")
            chatbot = gr.Chatbot(height=500)
            msg = gr.Textbox(placeholder="Digite sua mensagem aqui..", show_label=False)

            btn = gr.Button("Enviar", variant="primary")
            history_btn = gr.Button("Histórico", variant="secondary")
            history_output = gr.JSON()

    def respond(message, chat_history, model_name):
        response = chatbot_response(message, model_name)
        response = clean_response(response)
        chat_history.append((message, response))
        return "", chat_history

    btn.click(respond, [msg, chatbot, model_selector], [msg, chatbot])
    msg.submit(respond, [msg, chatbot, model_selector], [msg, chatbot])

    def toggle_history():
        global show_history_flag
        show_history_flag = not show_history_flag
        return history_log if show_history_flag else {}

    history_btn.click(toggle_history, inputs=[], outputs=history_output)

if __name__ == "__main__":
    demo.launch()