giacomov commited on
Commit
689a76f
·
1 Parent(s): 1005fed

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +16 -0
  2. README.md +1 -0
  3. app.py +40 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM condaforge/mambaforge:23.1.0-1
5
+
6
+ RUN mamba install -y sbt=1.7.1 git gradio
7
+
8
+ WORKDIR /work
9
+
10
+ COPY data/pdffigures2.jar /work
11
+ COPY app.py /work
12
+
13
+
14
+ ENTRYPOINT python app.py
15
+
16
+ # sbt "runMain org.allenai.pdffigures2.FigureExtractorBatchCli 2304.11968v1.Track_Anything_Segment_Anything_Meets_Videos.pdf -m figures -t 48 -q"
README.md CHANGED
@@ -6,6 +6,7 @@ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
+ app_port: 7860
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import urllib.request
3
+ import subprocess
4
+ import os
5
+ import glob
6
+
7
+
8
+ def extract_figure(url):
9
+ # download PDF file from URL
10
+ urllib.request.urlretrieve(url, "input.pdf")
11
+
12
+ # extract first figure from PDF using pdffigures2
13
+ subprocess.run(["java", "-jar", "pdffigures2.jar", "input.pdf", "-m", "figures_"])
14
+
15
+ all_pngs = glob.glob("*.png")
16
+ print(all_pngs)
17
+
18
+ # get path to first figure
19
+ figure_path = "figures_input-Figure1-1.png"
20
+
21
+ # # read first figure from file
22
+ # with open(figure_path, "rb") as f:
23
+ # figure_bytes = f.read()
24
+
25
+ # # delete downloaded file and figure file
26
+ # os.remove("input.pdf")
27
+ # os.remove(figure_path)
28
+
29
+ # return first figure
30
+ return figure_path
31
+
32
+ # define input and output interfaces
33
+ inputs = gr.inputs.Textbox(label="Enter URL of PDF file:")
34
+ outputs = gr.outputs.Image(label="First figure in PDF:", type="filepath")
35
+
36
+ # create interface
37
+ interface = gr.Interface(fn=extract_figure, inputs=inputs, outputs=outputs, title="Extract First Figure from PDF", description="Enter the URL of a PDF file and the first figure in the file will be extracted and displayed.")
38
+
39
+ # launch interface
40
+ interface.launch()