File size: 2,849 Bytes
e31868e
 
 
885cb8b
324f4bb
e31868e
 
 
 
 
 
 
 
7de1329
9ea2212
26425fc
a8b1809
7de1329
 
885cb8b
7de1329
 
a8b1809
220eb38
 
 
 
a8b1809
7de1329
 
885cb8b
 
a8b1809
885cb8b
e31868e
 
7de1329
 
6a39342
e31868e
885cb8b
6a39342
e31868e
885cb8b
 
a8b1809
885cb8b
7de1329
885cb8b
 
 
 
a8b1809
885cb8b
e31868e
ca61f86
b061628
5242bc5
b061628
 
 
 
 
 
 
5242bc5
b061628
 
 
5242bc5
 
 
74f970d
b061628
7de1329
 
 
a8b1809
e31868e
 
7de1329
 
a8b1809
7de1329
885cb8b
 
 
 
7de1329
885cb8b
e31868e
 
 
6638b93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import gradio as gr
from huggingface_hub import InferenceClient
import os
import time
import re 

HF_TOKEN = os.getenv("HF_TOKEN")

client = InferenceClient(
    provider="sambanova",
    api_key=HF_TOKEN,
)

MODELS = {
    "LLaMA 70B": "meta-llama/Llama-3.3-70B-Instruct",
    "Qwen 32B": "Qwen/QwQ-32B",
    "DeepSeek R1": "deepseek-ai/DeepSeek-R1",
}

history_log = []
show_history_flag = False

def clean_response(text):
    text = re.sub(r"</?think>", "", text)
    text = re.sub(r"\\boxed\{.*?\}", "", text)

    return text.strip()

def chatbot_response(user_input, model_name):
    model_id = MODELS[model_name]
    messages = [{"role": "user", "content": entry["Pergunta"]} for entry in history_log[-2:]]
    messages.append({"role": "user", "content": user_input})

    start_time = time.time()
    try:
        completion = client.chat.completions.create(
            model=model_id,
            messages=messages,
            max_tokens = 8192 if any(m in model_id for m in ["Qwen", "DeepSeek"]) else 800
        )
        response = completion.choices[0].message['content']
        
    except Exception as e:
        response = f"Erro ao gerar resposta: {str(e)}"
    end_time = time.time()

    history_log.append({
        "Modelo": model_name,
        "Pergunta": user_input,
        "Resposta": response,
        "Tempo de Resposta (s)": round(end_time - start_time, 2)
    })

    return response

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("## ⚙️ Configurações")
            model_selector = gr.Dropdown(
                choices=list(MODELS.keys()),
                label="Escolha o Modelo",
                value="LLaMA 70B"
            )
            
        with gr.Column(scale=3):
            gr.Markdown("# 🤖 Chatbot - API SambaNova")
            chatbot = gr.Chatbot(height=500)
            msg = gr.Textbox(placeholder="Digite sua mensagem aqui...", show_label=False)
            
            btn = gr.Button("Enviar", variant="primary")
            history_btn = gr.Button("Histórico", variant="secondary")
                
            history_output = gr.JSON()

    def respond(message, chat_history, model_name):
        response = chatbot_response(message, model_name)
        response = clean_response(response)
        chat_history.append((message, response))
        return "", chat_history

    btn.click(respond, [msg, chatbot, model_selector], [msg, chatbot])
    msg.submit(respond, [msg, chatbot, model_selector], [msg, chatbot])

    def toggle_history():
        global show_history_flag
        show_history_flag = not show_history_flag
        return history_log if show_history_flag else {}

    history_btn.click(toggle_history, inputs=[], outputs=history_output)

if __name__ == "__main__":
    demo.launch()