import gradio as gr
from huggingface_hub import InferenceClient
import os
import time

HF_TOKEN = os.getenv("HF_TOKEN")

client = InferenceClient(
    provider="sambanova",
    api_key=HF_TOKEN,
)

history_log = []
show_history_flag = False 

def chatbot_response(user_input):
    messages = [{"role": "user", "content": entry["Pergunta"]} for entry in history_log[-2:]]
    messages.append({"role": "user", "content": user_input})
    
    start_time = time.time()
    try:
        completion = client.chat.completions.create(
            model="meta-llama/Llama-3.3-70B-Instruct", 
            messages=messages, 
            max_tokens=500,
        )
        response = completion.choices[0].message['content']
    except Exception as e:
        response = f"Erro ao gerar resposta: {str(e)}"
    end_time = time.time()
    
    history_log.append({
        "Pergunta": user_input,
        "Resposta": response,
        "Tempo de Resposta (s)": round(end_time - start_time, 2)
    })

    print(history_log)
    
    return response

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🤖 Llama-70B Chatbot - SambaNova")
    gr.Markdown("Modelo: meta-llama/Llama-3.3-70B-Instruct")
    
    chatbot = gr.Chatbot(height=500) 
    msg = gr.Textbox(placeholder="Digite sua mensagem aqui...")
    btn = gr.Button("Enviar", variant="primary") 
    
    def respond(message, chat_history):
        response = chatbot_response(message)
        chat_history.append((message, response))
        return "", chat_history
    
    btn.click(respond, [msg, chatbot], [msg, chatbot])
    
    def toggle_history():
        global show_history_flag
        show_history_flag = not show_history_flag
        return history_log if show_history_flag else {}
    
    history_btn = gr.Button("Ver/Fechar Histórico", variant="secondary") 
    history_output = gr.JSON()
    history_btn.click(toggle_history, inputs=[], outputs=history_output)

if __name__ == "__main__":
    demo.launch()