ignaciaginting commited on
Commit
e205139
·
verified ·
1 Parent(s): 4194cc5
Files changed (1) hide show
  1. app.py +17 -0
app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from huggingface_hub import snapshot_download
4
+ from pdf_extract_kit import extract_text # Assuming this function exists in the toolkit
5
+
6
+ # Ensure the model is downloaded
7
+ model_dir = "./PDF-Extract-Kit-1.0"
8
+ if not os.path.exists(model_dir):
9
+ snapshot_download(repo_id='opendatalab/PDF-Extract-Kit-1.0', local_dir=model_dir, max_workers=20)
10
+
11
+ def process_pdf(file):
12
+ # Implement your PDF processing logic here using PDF-Extract-Kit
13
+ extracted_text = extract_text(file.name, model_dir=model_dir)
14
+ return extracted_text
15
+
16
+ iface = gr.Interface(fn=process_pdf, inputs=gr.File(type="binary"), outputs="text", title="PDF Extractor")
17
+ iface.launch()