File size: 3,602 Bytes
07ac240
 
f5b5690
 
 
07ac240
f5b5690
07ac240
67c487c
07ac240
 
f5b5690
07ac240
 
 
 
 
f5b5690
 
07ac240
 
 
f5b5690
 
07ac240
 
f5b5690
 
 
 
07ac240
f5b5690
 
 
 
 
 
 
07ac240
f5b5690
 
 
 
 
07ac240
f5b5690
 
 
07ac240
f5b5690
 
07ac240
f5b5690
 
 
 
07ac240
f5b5690
 
07ac240
f5b5690
07ac240
f5b5690
07ac240
f5b5690
07ac240
 
 
f5b5690
 
07ac240
 
 
 
f5b5690
07ac240
 
 
 
f5b5690
07ac240
 
 
 
 
 
 
 
 
 
f5b5690
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import gradio as gr
import easyocr
import pdfplumber
import random

# 載入本地模型與 tokenizer
MODEL_PATH = "valhalla/t5-base-qg-hl"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH)

def generate_text(prompt, max_length=100, temperature=0.8, top_p=0.9):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=max_length, do_sample=True, temperature=temperature, top_p=top_p)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def question_generator_with_answer(context):
    question = generate_text(f"question: {context}")
    correct_answer = generate_text(f"answer: {context}", temperature=1.0)
    
    wrong_answers = set()
    while len(wrong_answers) < 3:
        wrong_answer = generate_text(f"answer: {context}", max_length=50, temperature=1.0, top_p=0.8)
        if wrong_answer != correct_answer and "?" not in wrong_answer:
            wrong_answers.add(wrong_answer)
    
    choices = list(wrong_answers) + [correct_answer]
    random.shuffle(choices)
    
    return {
        "question": question,
        "choices": choices,
        "correct_answer": correct_answer
    }

def format_question_output(context):
    question_result = []
    for j in range(4):
        result = question_generator_with_answer(context)
        question_text = f"{result['question']}\n"
        choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
        question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
    return "\n".join(question_result)

def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path.name) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return format_question_output(text)

def OCR(photo):
    reader = easyocr.Reader(['en', 'ch_tra'])
    results = reader.readtext(photo)
    return "".join([text for (_, text, _) in results])

def OCR_gen(text):
    if not text.strip():
        return "錯誤:OCR 沒有輸出任何可用的文字,請重新檢查圖片內容。"
    return format_question_output(text)

demo = gr.Blocks()
with demo:
    gr.Markdown("<h1>產生英文題目</h1>")
    gr.Markdown("這是一個利用 hugging face 產生英文題目的小專案")
    
    with gr.Tabs():
        with gr.TabItem("輸入文字"):
            text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
            text_output = gr.Textbox(label="題目")
            text_button = gr.Button("產生題目")
        
        with gr.TabItem("PDF文件辨識"):
            PDF_input = gr.File(label="請上傳PDF文件")
            PDF_output = gr.Textbox()
            PDF_button = gr.Button("產生題目")
        
        with gr.TabItem("圖片辨識"):
            image_input = gr.Image()
            img_tem = gr.Textbox(placeholder="請確認辨識結果", label="辨識結果")
            img_button = gr.Button("開始解析")
            image_button = gr.Button("產生題目")
            image_output = gr.Textbox(label="題目")

    text_button.click(format_question_output, inputs=text_input, outputs=text_output)
    PDF_button.click(extract_text_from_pdf, inputs=PDF_input, outputs=PDF_output)
    img_button.click(OCR, inputs=image_input, outputs=img_tem)
    image_button.click(OCR_gen, inputs=img_tem, outputs=image_output)

demo.launch()