from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import torch import gradio as gr import easyocr import pdfplumber import random # 載入本地模型與 tokenizer MODEL_PATH = "valhalla/t5-base-qg-hl" tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH) def generate_text(prompt, max_length=100, temperature=0.8, top_p=0.9): inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): outputs = model.generate(**inputs, max_length=max_length, do_sample=True, temperature=temperature, top_p=top_p) return tokenizer.decode(outputs[0], skip_special_tokens=True) def question_generator_with_answer(context): question = generate_text(f"question: {context}") correct_answer = generate_text(f"answer: {context}", temperature=1.0) wrong_answers = set() while len(wrong_answers) < 3: wrong_answer = generate_text(f"answer: {context}", max_length=50, temperature=1.0, top_p=0.8) if wrong_answer != correct_answer and "?" not in wrong_answer: wrong_answers.add(wrong_answer) choices = list(wrong_answers) + [correct_answer] random.shuffle(choices) return { "question": question, "choices": choices, "correct_answer": correct_answer } def format_question_output(context): question_result = [] for j in range(4): result = question_generator_with_answer(context) question_text = f"{result['question']}\n" choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])]) question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n") return "\n".join(question_result) def extract_text_from_pdf(pdf_path): text = "" with pdfplumber.open(pdf_path.name) as pdf: for page in pdf.pages: text += page.extract_text() + "\n" return format_question_output(text) def OCR(photo): reader = easyocr.Reader(['en', 'ch_tra']) results = reader.readtext(photo) return "".join([text for (_, text, _) in results]) def OCR_gen(text): if not text.strip(): return "錯誤:OCR 沒有輸出任何可用的文字,請重新檢查圖片內容。" return format_question_output(text) demo = gr.Blocks() with demo: gr.Markdown("