Spaces:

mkdirjason
/

Generating_English_Question

Sleeping

File size: 6,219 Bytes

6c6acc8
f5b5690


from transformers import pipeline
import gradio as gr
import easyocr
import pdfplumber
import random 

#適用於Interface、Block
title = "<h1>產生英文題目</h1>"
description = """這是一個利用hugging face 產生英文題目的小專案"""
textbox = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)

#加入磚
demo = gr.Blocks()

# 加載 Hugging Face 上的問答模型
question_generator = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")

# def question_generate(context):
#     # 讓模型根據文章生成問題
#     question_result = []
#     for i in range(5):
#         question = question_generator(f"question: {context}",
#             max_length=100,
#             do_sample = True, # 啟用採樣以增加多樣性
#             temperature=0.8 + (i * 0.1),  # 逐漸增加溫度參數來獲得更多樣的結果
#             top_p=0.9
#                         )
#         question_result.append(f"Q{i+1}. {question[0]['generated_text']}")

#     return "\n".join(question_result)  #以換行符號返回多個問題

def question_generator_with_answer(context):

    #產生題目
    question_data = question_generator(f"question:{context}", 
    max_length=100, do_sample=True, temperature=0.8, top_p=0.9)
    question = question_data[0]['generated_text']
 
    #產生正確答案
    answer_data = question_generator(f"answer:{context}", 
    max_length=100, do_sample=True, temperature=1, top_p=0.9)
    correct_answer = answer_data[0]['generated_text']

    #產生錯誤答案
    wrong_answers = set()
    while len(wrong_answers) < 3:
        wrong_data = question_generator(f"answer: {context}", max_length=50, 
                    do_sample=True, temperature=1.0, top_p=0.8)
        wrong_answer = wrong_data[0]['generated_text']
        if wrong_answer != correct_answer and "?" not in wrong_answer:  # 避免重複正確答案
            wrong_answers.add(wrong_answer)

    
    # 將正確答案加入選項，並打亂順序
    choices = list(wrong_answers) + [correct_answer]
    random.shuffle(choices)


    # 回傳題目與選項
    return {
        "question": question,
        "choices": choices,
        "correct_answer": correct_answer
    }

def format_question_output(context):
    question_result=[]
    for j in range(4):
        result = question_generator_with_answer(context)
        question_text = f"{result['question']}\n"
        choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
        question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
    return "\n".join(question_result) #用換行來連接

# def format_question_output(context):
#     result = question_generator_with_answer(context)
#     question_text = f"**{result['question']}**\n\n"
#     choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
#     return f"{question_text}\n{choices_text}\n\n✅ 正確答案: {result['correct_answer']}"
 

#pdf辨識
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path.name) as pdf:  
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    ls = format_question_output(text)
    return ls



#圖片辨識(辨識度太低)
def OCR(photo):
    text_inner = ""
    questions = []
    reader = easyocr.Reader(['en', 'ch_tra'])
    results = reader.readtext(photo)
    for (bbox, text, prob) in results:
        text_inner += text
    return text_inner


#確認辨識結果沒有問題後，產生題目
def OCR_gen(text):
    if not text.strip():  # 確保輸入的 text 不是空的
        return "錯誤：OCR 沒有輸出任何可用的文字，請重新檢查圖片內容。"
    ls = format_question_output(text)
    return ls


with demo:
    gr.Markdown(title)
    gr.Markdown(description)
    with gr.Tabs():
        with gr.TabItem("輸入文字"):
            with gr.Row():
                text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
                with gr.Column():
                    text_output = gr.Textbox(label="題目")
                    text_button = gr.Button("產生題目")
        with gr.TabItem("PDF文件辨識"):
            with gr.Row():
                PDF_input = gr.File(label="請上傳PDF文件")
                with gr.Column():
                    PDF_output = gr.Textbox()
                    PDF_button = gr.Button("產生題目")
        with gr.TabItem("圖片辨識"):
            with gr.Row():
                image_input = gr.Image()
                #解析圖片文字
                with gr.Column():
                    img_tem = gr.Textbox(placeholder="請確認辨識結果",label="辨識結果")
                    img_button = gr.Button("開始解析")
                    image_button = gr.Button("產生題目")
                # #產生題目
                with gr.Column():
                    image_output = gr.Textbox(label="題目")

    #判別有沒有輸入文章
    def validate_and_generate(text):
        if not text.strip():
            return "請輸入文章以產生題目"
        return format_question_output(text)

    #文字輸入                 物件
    text_button.click(validate_and_generate, inputs=text_input, outputs=text_output)
    
    #判別有沒有上傳檔案
    def test_PDF(file):
        if not file:
            return "請上傳PDF文件以產生題目"
        return extract_text_from_pdf(file)

    #PDF輸入
    PDF_button.click(test_PDF, inputs=PDF_input, outputs=PDF_output)

    #判別有沒有上傳照片
    def test_image(image):
        if image is None:
            return "請上傳圖片以產生題目"
        return OCR(image)

    #辨識文章
    img_button.click(test_image, inputs=image_input, outputs=img_tem)


    #檢查辨識結果有沒有存在
    def test_finished(text):
        if (not text.strip() or text == "請上傳圖片以產生題目"):
            return "請確認文章已經輸入"
        return OCR_gen(text)
    image_button.click(test_finished, inputs=img_tem, outputs=image_output)



demo.launch()