import gradio as gr from huggingface_hub import InferenceClient import os import time HF_TOKEN = os.getenv("HF_TOKEN") client = InferenceClient( provider="sambanova", api_key=HF_TOKEN, ) history_log = [] show_history_flag = False def chatbot_response(user_input): messages = [{"role": "user", "content": entry["Pergunta"]} for entry in history_log[-2:]] messages.append({"role": "user", "content": user_input}) start_time = time.time() try: completion = client.chat.completions.create( model="meta-llama/Llama-3.3-70B-Instruct", messages=messages, max_tokens=500, ) response = completion.choices[0].message['content'] except Exception as e: response = f"Erro ao gerar resposta: {str(e)}" end_time = time.time() history_log.append({ "Pergunta": user_input, "Resposta": response, "Tempo de Resposta (s)": round(end_time - start_time, 2) }) print(history_log) return response with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🤖 Llama-70B Chatbot - SambaNova") gr.Markdown("Modelo: meta-llama/Llama-3.3-70B-Instruct") chatbot = gr.Chatbot(height=500) msg = gr.Textbox(placeholder="Digite sua mensagem aqui...") btn = gr.Button("Enviar", variant="primary") def respond(message, chat_history): response = chatbot_response(message) chat_history.append((message, response)) return "", chat_history btn.click(respond, [msg, chatbot], [msg, chatbot]) def toggle_history(): global show_history_flag show_history_flag = not show_history_flag return history_log if show_history_flag else {} history_btn = gr.Button("Ver/Fechar Histórico", variant="secondary") history_output = gr.JSON() history_btn.click(toggle_history, inputs=[], outputs=history_output) if __name__ == "__main__": demo.launch()