Tbb1111 commited on
Commit
2304747
·
verified ·
1 Parent(s): e9f1346

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -5
app.py CHANGED
@@ -1,17 +1,49 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def dummy_translate(pdf_file):
4
- return "目前只是一个占位函数,稍后实现真正翻译逻辑。"
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  with gr.Blocks() as demo:
7
- gr.Markdown("# PDF 英文翻译器(开发中)")
8
 
9
  with gr.Row():
10
  pdf_input = gr.File(label="上传英文 PDF 文件", file_types=[".pdf"])
11
 
12
  translate_button = gr.Button("开始翻译")
13
- output_text = gr.Textbox(label="翻译后内容", lines=10)
14
 
15
- translate_button.click(fn=dummy_translate, inputs=pdf_input, outputs=output_text)
16
 
17
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
3
+ import torch
4
+ import fitz # PyMuPDF
5
+ from fpdf import FPDF
6
 
7
+ # 加载 T5 模型和分词器
8
+ model_name = "t5-small" # 可以根据需要调整模型大小
9
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
10
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
11
 
12
+ # 翻译功能
13
+ def translate_pdf(pdf_file):
14
+ # 读取 PDF 文件
15
+ doc = fitz.open(pdf_file.name)
16
+ text = ""
17
+ for page in doc:
18
+ text += page.get_text()
19
+
20
+ # 使用 T5 模型进行翻译
21
+ inputs = tokenizer.encode("translate English to Chinese: " + text, return_tensors="pt", max_length=512, truncation=True)
22
+ outputs = model.generate(inputs, max_length=1024, num_beams=4, early_stopping=True)
23
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+
25
+ # 创建翻译后的 PDF
26
+ translated_pdf = FPDF()
27
+ translated_pdf.add_page()
28
+ translated_pdf.set_font("Arial", size=12)
29
+ translated_pdf.multi_cell(0, 10, translated_text)
30
+
31
+ # 保存翻译后的 PDF
32
+ translated_pdf_path = "translated_output.pdf"
33
+ translated_pdf.output(translated_pdf_path)
34
+
35
+ return translated_pdf_path
36
+
37
+ # 创建 Gradio 界面
38
  with gr.Blocks() as demo:
39
+ gr.Markdown("# PDF 英文翻译器")
40
 
41
  with gr.Row():
42
  pdf_input = gr.File(label="上传英文 PDF 文件", file_types=[".pdf"])
43
 
44
  translate_button = gr.Button("开始翻译")
45
+ output_pdf = gr.File(label="下载翻译后的 PDF")
46
 
47
+ translate_button.click(fn=translate_pdf, inputs=pdf_input, outputs=output_pdf)
48
 
49
  demo.launch()