Spaces:
Running
Running
import gradio as gr | |
import logging, langdetect, os | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
from deep_translator import GoogleTranslator | |
hf_token = os.getenv("HF_AUTH_TOKEN") | |
# Khởi tạo client HF và translator | |
MODEL_ID = "google-t5/t5-small" | |
tokenizer = T5Tokenizer.from_pretrained(MODEL_ID, token=hf_token) | |
model = T5ForConditionalGeneration.from_pretrained(MODEL_ID, token=hf_token) | |
model.eval() | |
translator_vi2en = GoogleTranslator(source='vi', target='en') | |
translator_en2vi = GoogleTranslator(source='en', target='vi') | |
logging.basicConfig(level=logging.INFO) | |
def respond(message, history): | |
# T5 expects a “text2text” prompt – ta prefix “question: ” | |
prompt = f"question: {translator_vi2en.translate(message)}" | |
# encode + generate | |
inputs = tokenizer(prompt, return_tensors="pt") | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=200, | |
do_sample=False, # greedy | |
pad_token_id=tokenizer.eos_token_id | |
) | |
# decode và lấy phần sau prompt | |
full = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# T5 sẽ trả luôn phần “answer: …” hay chỉ kết quả | |
answer = full.replace("answer: ", "").strip() | |
answer_vi = translator_en2vi.translate(answer) | |
# Ensure that the response is a valid string and not empty | |
if not answer_vi.strip(): # if the response is empty | |
answer_vi = "Sorry, I couldn't understand the question." | |
# Make sure to append the message and response as a tuple of strings | |
history.append((message, answer_vi)) | |
# Log the formatted history | |
logging.info(f"Message: {message}, Answer: {answer_vi}") | |
return "", history | |
# 3) Gradio ChatInterface | |
demo = gr.ChatInterface( | |
fn=respond, | |
title="Chatbot tiếng Việt", | |
description="Chatbot hỗ trợ bài tập và nhắn tin bằng tiếng Việt", | |
theme="soft" | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) |