mkdirjason commited on
Commit
f5b5690
·
verified ·
1 Parent(s): 902e3f2
Files changed (1) hide show
  1. app.py +179 -181
app.py CHANGED
@@ -1,181 +1,179 @@
1
- !pip install gradio
2
- !pip install easyocr
3
- !pip install pdfplumber
4
-
5
- from transformers import pipeline
6
- import gradio as gr
7
- import easyocr
8
- import pdfplumber
9
- import random
10
-
11
- #適用於Interface、Block
12
- title = "<h1>產生英文題目</h1>"
13
- description = """這是一個利用hugging face 產生英文題目的小專案"""
14
- textbox = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
15
-
16
- #加入磚
17
- demo = gr.Blocks()
18
-
19
- # 加載 Hugging Face 上的問答模型
20
- question_generator = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")
21
-
22
- # def question_generate(context):
23
- # # 讓模型根據文章生成問題
24
- # question_result = []
25
- # for i in range(5):
26
- # question = question_generator(f"question: {context}",
27
- # max_length=100,
28
- # do_sample = True, # 啟用採樣以增加多樣性
29
- # temperature=0.8 + (i * 0.1), # 逐漸增加溫度參數來獲得更多樣的結果
30
- # top_p=0.9
31
- # )
32
- # question_result.append(f"Q{i+1}. {question[0]['generated_text']}")
33
-
34
- # return "\n".join(question_result) #以換行符號返回多個問題
35
-
36
- def question_generator_with_answer(context):
37
-
38
- #產生題目
39
- question_data = question_generator(f"question:{context}",
40
- max_length=100, do_sample=True, temperature=0.8, top_p=0.9)
41
- question = question_data[0]['generated_text']
42
-
43
- #產生正確答案
44
- answer_data = question_generator(f"answer:{context}",
45
- max_length=100, do_sample=True, temperature=1, top_p=0.9)
46
- correct_answer = answer_data[0]['generated_text']
47
-
48
- #產生錯誤答案
49
- wrong_answers = set()
50
- while len(wrong_answers) < 3:
51
- wrong_data = question_generator(f"answer: {context}", max_length=50,
52
- do_sample=True, temperature=1.0, top_p=0.8)
53
- wrong_answer = wrong_data[0]['generated_text']
54
- if wrong_answer != correct_answer and "?" not in wrong_answer: # 避免重複正確答案
55
- wrong_answers.add(wrong_answer)
56
-
57
-
58
- # 將正確答案加入選項,並打亂順序
59
- choices = list(wrong_answers) + [correct_answer]
60
- random.shuffle(choices)
61
-
62
-
63
- # 回傳題目與選項
64
- return {
65
- "question": question,
66
- "choices": choices,
67
- "correct_answer": correct_answer
68
- }
69
-
70
- def format_question_output(context):
71
- question_result=[]
72
- for j in range(4):
73
- result = question_generator_with_answer(context)
74
- question_text = f"{result['question']}\n"
75
- choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
76
- question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
77
- return "\n".join(question_result) #用換行來連接
78
-
79
- # def format_question_output(context):
80
- # result = question_generator_with_answer(context)
81
- # question_text = f"**{result['question']}**\n\n"
82
- # choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
83
- # return f"{question_text}\n{choices_text}\n\n✅ 正確答案: {result['correct_answer']}"
84
-
85
-
86
- #pdf辨識
87
- def extract_text_from_pdf(pdf_path):
88
- text = ""
89
- with pdfplumber.open(pdf_path.name) as pdf:
90
- for page in pdf.pages:
91
- text += page.extract_text() + "\n"
92
- ls = format_question_output(text)
93
- return ls
94
-
95
-
96
-
97
- #圖片辨識(辨識度太低)
98
- def OCR(photo):
99
- text_inner = ""
100
- questions = []
101
- reader = easyocr.Reader(['en', 'ch_tra'])
102
- results = reader.readtext(photo)
103
- for (bbox, text, prob) in results:
104
- text_inner += text
105
- return text_inner
106
-
107
-
108
- #確認辨識結果沒有問題後,產生題目
109
- def OCR_gen(text):
110
- if not text.strip(): # 確保輸入的 text 不是空的
111
- return "錯誤:OCR 沒有輸出任何可用的文字,請重新檢查圖片內容。"
112
- ls = format_question_output(text)
113
- return ls
114
-
115
-
116
- with demo:
117
- gr.Markdown(title)
118
- gr.Markdown(description)
119
- with gr.Tabs():
120
- with gr.TabItem("輸入文字"):
121
- with gr.Row():
122
- text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
123
- with gr.Column():
124
- text_output = gr.Textbox(label="題目")
125
- text_button = gr.Button("產生題目")
126
- with gr.TabItem("PDF文件辨識"):
127
- with gr.Row():
128
- PDF_input = gr.File(label="請上傳PDF文件")
129
- with gr.Column():
130
- PDF_output = gr.Textbox()
131
- PDF_button = gr.Button("產生題目")
132
- with gr.TabItem("圖片辨識"):
133
- with gr.Row():
134
- image_input = gr.Image()
135
- #解析圖片文字
136
- with gr.Column():
137
- img_tem = gr.Textbox(placeholder="請確認辨識結果",label="辨識結果")
138
- img_button = gr.Button("開始解析")
139
- image_button = gr.Button("產生題目")
140
- # #產生題目
141
- with gr.Column():
142
- image_output = gr.Textbox(label="題目")
143
-
144
- #判別有沒有輸入文章
145
- def validate_and_generate(text):
146
- if not text.strip():
147
- return "請輸入文章以產生題目"
148
- return format_question_output(text)
149
-
150
- #文字輸入 物件
151
- text_button.click(validate_and_generate, inputs=text_input, outputs=text_output)
152
-
153
- #判別有沒有上傳檔案
154
- def test_PDF(file):
155
- if not file:
156
- return "請上傳PDF文件以產生題目"
157
- return extract_text_from_pdf(file)
158
-
159
- #PDF輸入
160
- PDF_button.click(test_PDF, inputs=PDF_input, outputs=PDF_output)
161
-
162
- #判別有沒有上傳照片
163
- def test_image(image):
164
- if image is None:
165
- return "請上傳圖片以產生題目"
166
- return OCR(image)
167
-
168
- #辨識文章
169
- img_button.click(test_image, inputs=image_input, outputs=img_tem)
170
-
171
-
172
- #檢查辨識結果有沒有存在
173
- def test_finished(text):
174
- if (not text.strip() or text == "請上傳圖片以產生題目"):
175
- return "請確認文章已經輸入"
176
- return OCR_gen(text)
177
- image_button.click(test_finished, inputs=img_tem, outputs=image_output)
178
-
179
-
180
-
181
- demo.launch()
 
1
+
2
+
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+ import easyocr
6
+ import pdfplumber
7
+ import random
8
+
9
+ #適用於Interface、Block
10
+ title = "<h1>產生英文題目</h1>"
11
+ description = """這是一個利用hugging face 產生英文題目的小專案"""
12
+ textbox = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
13
+
14
+ #加入磚
15
+ demo = gr.Blocks()
16
+
17
+ # 加載 Hugging Face 上的問答模型
18
+ question_generator = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")
19
+
20
+ # def question_generate(context):
21
+ # # 讓模型根據文章生成問題
22
+ # question_result = []
23
+ # for i in range(5):
24
+ # question = question_generator(f"question: {context}",
25
+ # max_length=100,
26
+ # do_sample = True, # 啟用採樣以增加多樣性
27
+ # temperature=0.8 + (i * 0.1), # 逐漸增加溫度參數來獲得更多樣的結果
28
+ # top_p=0.9
29
+ # )
30
+ # question_result.append(f"Q{i+1}. {question[0]['generated_text']}")
31
+
32
+ # return "\n".join(question_result) #以換行符號返回多個問題
33
+
34
+ def question_generator_with_answer(context):
35
+
36
+ #產生題目
37
+ question_data = question_generator(f"question:{context}",
38
+ max_length=100, do_sample=True, temperature=0.8, top_p=0.9)
39
+ question = question_data[0]['generated_text']
40
+
41
+ #產生正確答案
42
+ answer_data = question_generator(f"answer:{context}",
43
+ max_length=100, do_sample=True, temperature=1, top_p=0.9)
44
+ correct_answer = answer_data[0]['generated_text']
45
+
46
+ #產生錯誤答案
47
+ wrong_answers = set()
48
+ while len(wrong_answers) < 3:
49
+ wrong_data = question_generator(f"answer: {context}", max_length=50,
50
+ do_sample=True, temperature=1.0, top_p=0.8)
51
+ wrong_answer = wrong_data[0]['generated_text']
52
+ if wrong_answer != correct_answer and "?" not in wrong_answer: # 避免重複正確答案
53
+ wrong_answers.add(wrong_answer)
54
+
55
+
56
+ # 將正確答案加入選項,並打亂順序
57
+ choices = list(wrong_answers) + [correct_answer]
58
+ random.shuffle(choices)
59
+
60
+
61
+ # 回傳題目與選項
62
+ return {
63
+ "question": question,
64
+ "choices": choices,
65
+ "correct_answer": correct_answer
66
+ }
67
+
68
+ def format_question_output(context):
69
+ question_result=[]
70
+ for j in range(4):
71
+ result = question_generator_with_answer(context)
72
+ question_text = f"{result['question']}\n"
73
+ choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
74
+ question_result.append(f"\nQ{j+1}.{question_text}\n{choices_text}\n")
75
+ return "\n".join(question_result) #用換行來連接
76
+
77
+ # def format_question_output(context):
78
+ # result = question_generator_with_answer(context)
79
+ # question_text = f"**{result['question']}**\n\n"
80
+ # choices_text = "\n".join([f"{chr(65+i)}. {choice}" for i, choice in enumerate(result['choices'])])
81
+ # return f"{question_text}\n{choices_text}\n\n✅ 正確答案: {result['correct_answer']}"
82
+
83
+
84
+ #pdf辨識
85
+ def extract_text_from_pdf(pdf_path):
86
+ text = ""
87
+ with pdfplumber.open(pdf_path.name) as pdf:
88
+ for page in pdf.pages:
89
+ text += page.extract_text() + "\n"
90
+ ls = format_question_output(text)
91
+ return ls
92
+
93
+
94
+
95
+ #圖片辨識(辨識度太低)
96
+ def OCR(photo):
97
+ text_inner = ""
98
+ questions = []
99
+ reader = easyocr.Reader(['en', 'ch_tra'])
100
+ results = reader.readtext(photo)
101
+ for (bbox, text, prob) in results:
102
+ text_inner += text
103
+ return text_inner
104
+
105
+
106
+ #確認辨識結果沒有問題後,產生題目
107
+ def OCR_gen(text):
108
+ if not text.strip(): # 確保輸入的 text 不是空的
109
+ return "錯誤:OCR 沒有輸出任何可用的文字,請重新檢查圖片內容。"
110
+ ls = format_question_output(text)
111
+ return ls
112
+
113
+
114
+ with demo:
115
+ gr.Markdown(title)
116
+ gr.Markdown(description)
117
+ with gr.Tabs():
118
+ with gr.TabItem("輸入文字"):
119
+ with gr.Row():
120
+ text_input = gr.Textbox(label="請輸入英文文章:", placeholder="While lily is setting...", lines=5)
121
+ with gr.Column():
122
+ text_output = gr.Textbox(label="題目")
123
+ text_button = gr.Button("產生題目")
124
+ with gr.TabItem("PDF文件辨識"):
125
+ with gr.Row():
126
+ PDF_input = gr.File(label="請上傳PDF文件")
127
+ with gr.Column():
128
+ PDF_output = gr.Textbox()
129
+ PDF_button = gr.Button("產生題目")
130
+ with gr.TabItem("圖片辨識"):
131
+ with gr.Row():
132
+ image_input = gr.Image()
133
+ #解析圖片文字
134
+ with gr.Column():
135
+ img_tem = gr.Textbox(placeholder="請確認辨識結果",label="辨識結果")
136
+ img_button = gr.Button("開始解析")
137
+ image_button = gr.Button("產生題目")
138
+ # #產生題目
139
+ with gr.Column():
140
+ image_output = gr.Textbox(label="題目")
141
+
142
+ #判別有沒有輸入文章
143
+ def validate_and_generate(text):
144
+ if not text.strip():
145
+ return "請輸入文章以產生題目"
146
+ return format_question_output(text)
147
+
148
+ #文字輸入 物件
149
+ text_button.click(validate_and_generate, inputs=text_input, outputs=text_output)
150
+
151
+ #判別有沒有上傳檔案
152
+ def test_PDF(file):
153
+ if not file:
154
+ return "請上傳PDF文件以產生題目"
155
+ return extract_text_from_pdf(file)
156
+
157
+ #PDF輸入
158
+ PDF_button.click(test_PDF, inputs=PDF_input, outputs=PDF_output)
159
+
160
+ #判別有沒有上傳照片
161
+ def test_image(image):
162
+ if image is None:
163
+ return "請上傳圖片以產生題目"
164
+ return OCR(image)
165
+
166
+ #辨識文章
167
+ img_button.click(test_image, inputs=image_input, outputs=img_tem)
168
+
169
+
170
+ #檢查辨識結果有沒有存在
171
+ def test_finished(text):
172
+ if (not text.strip() or text == "請上傳圖片以產生題目"):
173
+ return "請確認文章已經輸入"
174
+ return OCR_gen(text)
175
+ image_button.click(test_finished, inputs=img_tem, outputs=image_output)
176
+
177
+
178
+
179
+ demo.launch()