import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel # Load mô hình LoRA đã fine-tune model = AutoModelForCausalLM.from_pretrained("distilgpt2") tokenizer = AutoTokenizer.from_pretrained("distilgpt2") model = PeftModel.from_pretrained(model, "./lora-distilgpt2") # Hàm chat def chat(message): prompt = f"### Instruction:\n{message}\n\n### Response:\n" input_ids = tokenizer(prompt, return_tensors="pt").input_ids outputs = model.generate(input_ids, max_new_tokens=100) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Giao diện Gradio gr.Interface(fn=chat, inputs="text", outputs="text").launch()