Spaces:

mkdirjason
/

Generating_English_Question

Sleeping

File size: 3,602 Bytes

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import gradio as gr
import easyocr
import pdfplumber
import random

# 載入本地模型與 tokenizer
MODEL_PATH = "valhalla/t5-base-qg-hl"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH)

def generate_text(prompt, max_length=100, temperature=0.8, top_p=0.9):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=max_length, do_sample=True, temperature=temperature, top_p=top_p)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def question_generator_with_answer(context):
    question = generate_text(f"question: {context}")
    correct_answer = generate_text(f"answer: {context}", temperature=1.0)
    
    wrong_answers = set()
    while len(wrong_answers) < 3:
        wrong_answer = generate_text(f"answer: {context}", max_length=50, temperature=1.0, top_p=0.8)
        if wrong_answer != correct_answer and "?" not in wrong_answer:
            wrong_answers.add(wrong_answer)
    
    choices = list(wrong_answers) + [correct_answer]
    random.shuffle(choices)
    
    return {
        "question": question,
        "choices": choices,
        "correct_answer": correct_answer
    }

def format_question_output(context):
    question_result = []
    for j in range(4):
        result = question_generator_with_answer(context)
        question_text = f"{result['question']}\n"
        choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
        question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
    return "\n".join(question_result)

def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path.name) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return format_question_output(text)

def OCR(photo):
    reader = easyocr.Reader(['en', 'ch_tra'])
    results = reader.readtext(photo)
    return "".join([text for (_, text, _) in results])

def OCR_gen(text):
    if not text.strip():
        return "錯誤：OCR 沒有輸出任何可用的文字，請重新檢查圖片內容。"
    return format_question_output(text)

demo = gr.Blocks()
with demo:
    gr.Markdown("<h1>產生英文題目</h1>")
    gr.Markdown("這是一個利用 hugging face 產生英文題目的小專案")
    
    with gr.Tabs():
        with gr.TabItem("輸入文字"):
            text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
            text_output = gr.Textbox(label="題目")
            text_button = gr.Button("產生題目")
        
        with gr.TabItem("PDF文件辨識"):
            PDF_input = gr.File(label="請上傳PDF文件")
            PDF_output = gr.Textbox()
            PDF_button = gr.Button("產生題目")
        
        with gr.TabItem("圖片辨識"):
            image_input = gr.Image()
            img_tem = gr.Textbox(placeholder="請確認辨識結果", label="辨識結果")
            img_button = gr.Button("開始解析")
            image_button = gr.Button("產生題目")
            image_output = gr.Textbox(label="題目")

    text_button.click(format_question_output, inputs=text_input, outputs=text_output)
    PDF_button.click(extract_text_from_pdf, inputs=PDF_input, outputs=PDF_output)
    img_button.click(OCR, inputs=image_input, outputs=img_tem)
    image_button.click(OCR_gen, inputs=img_tem, outputs=image_output)

demo.launch()