File size: 1,988 Bytes
c26f4f8
e919206
9662584
6731ed0
c26f4f8
16da7df
 
 
b47c34b
9662584
 
 
6731ed0
 
c26f4f8
de2702a
 
9662584
 
 
 
 
 
 
3190ec7
9662584
 
 
 
 
 
 
 
 
 
 
16da7df
8c4c569
 
 
beb797f
94d445b
9662584
8686f6b
94d445b
 
 
9662584
 
 
 
 
 
 
beb797f
9662584
917d560
6731ed0
214613f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import gradio as gr
import logging, langdetect, os
from transformers import T5Tokenizer, T5ForConditionalGeneration
from deep_translator import GoogleTranslator

hf_token = os.getenv("HF_AUTH_TOKEN")

# Khởi tạo client HF và translator
MODEL_ID = "google-t5/t5-small"
tokenizer = T5Tokenizer.from_pretrained(MODEL_ID, token=hf_token)
model     = T5ForConditionalGeneration.from_pretrained(MODEL_ID, token=hf_token)
model.eval()
translator_vi2en = GoogleTranslator(source='vi', target='en')
translator_en2vi = GoogleTranslator(source='en', target='vi')

logging.basicConfig(level=logging.INFO)

def respond(message, history):
    # T5 expects a “text2text” prompt – ta prefix “question: ”
    prompt = f"question: {translator_vi2en.translate(message)}"

    # encode + generate
    inputs  = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=False,     # greedy
        pad_token_id=tokenizer.eos_token_id
    )

    # decode và lấy phần sau prompt
    full = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # T5 sẽ trả luôn phần “answer: …” hay chỉ kết quả
    answer = full.replace("answer: ", "").strip()

    answer_vi = translator_en2vi.translate(answer)
    
    # Ensure that the response is a valid string and not empty
    if not answer_vi.strip():  # if the response is empty
        answer_vi = "Sorry, I couldn't understand the question."

    # Make sure to append the message and response as a tuple of strings
    history.append((message, answer_vi))
    
    # Log the formatted history
    logging.info(f"Message: {message}, Answer: {answer_vi}")
    
    return "", history

# 3) Gradio ChatInterface
demo = gr.ChatInterface(
    fn=respond,
    title="Chatbot tiếng Việt",
    description="Chatbot hỗ trợ bài tập và nhắn tin bằng tiếng Việt",
    theme="soft"
)

if __name__ == "__main__":
    demo.launch(share=True)