Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- Dockerfile +16 -0
- README.md +1 -0
- app.py +40 -0
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
|
2 |
+
# you will also find guides on how best to write your Dockerfile
|
3 |
+
|
4 |
+
FROM condaforge/mambaforge:23.1.0-1
|
5 |
+
|
6 |
+
RUN mamba install -y sbt=1.7.1 git gradio
|
7 |
+
|
8 |
+
WORKDIR /work
|
9 |
+
|
10 |
+
COPY data/pdffigures2.jar /work
|
11 |
+
COPY app.py /work
|
12 |
+
|
13 |
+
|
14 |
+
ENTRYPOINT python app.py
|
15 |
+
|
16 |
+
# sbt "runMain org.allenai.pdffigures2.FigureExtractorBatchCli 2304.11968v1.Track_Anything_Segment_Anything_Meets_Videos.pdf -m figures -t 48 -q"
|
README.md
CHANGED
@@ -6,6 +6,7 @@ colorTo: indigo
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
|
|
9 |
---
|
10 |
|
11 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
9 |
+
app_port: 7860
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import urllib.request
|
3 |
+
import subprocess
|
4 |
+
import os
|
5 |
+
import glob
|
6 |
+
|
7 |
+
|
8 |
+
def extract_figure(url):
|
9 |
+
# download PDF file from URL
|
10 |
+
urllib.request.urlretrieve(url, "input.pdf")
|
11 |
+
|
12 |
+
# extract first figure from PDF using pdffigures2
|
13 |
+
subprocess.run(["java", "-jar", "pdffigures2.jar", "input.pdf", "-m", "figures_"])
|
14 |
+
|
15 |
+
all_pngs = glob.glob("*.png")
|
16 |
+
print(all_pngs)
|
17 |
+
|
18 |
+
# get path to first figure
|
19 |
+
figure_path = "figures_input-Figure1-1.png"
|
20 |
+
|
21 |
+
# # read first figure from file
|
22 |
+
# with open(figure_path, "rb") as f:
|
23 |
+
# figure_bytes = f.read()
|
24 |
+
|
25 |
+
# # delete downloaded file and figure file
|
26 |
+
# os.remove("input.pdf")
|
27 |
+
# os.remove(figure_path)
|
28 |
+
|
29 |
+
# return first figure
|
30 |
+
return figure_path
|
31 |
+
|
32 |
+
# define input and output interfaces
|
33 |
+
inputs = gr.inputs.Textbox(label="Enter URL of PDF file:")
|
34 |
+
outputs = gr.outputs.Image(label="First figure in PDF:", type="filepath")
|
35 |
+
|
36 |
+
# create interface
|
37 |
+
interface = gr.Interface(fn=extract_figure, inputs=inputs, outputs=outputs, title="Extract First Figure from PDF", description="Enter the URL of a PDF file and the first figure in the file will be extracted and displayed.")
|
38 |
+
|
39 |
+
# launch interface
|
40 |
+
interface.launch()
|