kostissz commited on
Commit
2e8fc61
·
verified ·
1 Parent(s): 8803c6c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -0
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Tuple
2
+ import os
3
+ import gradio as gr
4
+ from docling.datamodel.base_models import InputFormat
5
+ from docling.datamodel.pipeline_options import PdfPipelineOptions
6
+ from docling.document_converter import DocumentConverter, PdfFormatOption
7
+ from docling_core.types import DoclingDocument
8
+ from docling.utils import model_downloader
9
+ from docling.datamodel.pipeline_options import smolvlm_picture_description
10
+
11
+ # Download models upon HF space initialization
12
+ if os.getenv("IS_HF_SPACE"):
13
+ model_downloader.download_models()
14
+
15
+
16
+ def parse_document(
17
+ file_path: str,
18
+ do_code_enrichment: bool,
19
+ do_formula_enrichment: bool,
20
+ do_picture_classification: bool,
21
+ do_picture_description: bool,
22
+ ) -> Tuple[DoclingDocument, str]:
23
+ yield None, f"Parsing document... ⏳"
24
+
25
+ pipeline_options = PdfPipelineOptions()
26
+ pipeline_options.do_code_enrichment = do_code_enrichment
27
+ pipeline_options.do_formula_enrichment = do_formula_enrichment
28
+ pipeline_options.generate_picture_images = do_picture_classification
29
+ pipeline_options.images_scale = 2
30
+ pipeline_options.do_picture_classification = do_picture_classification
31
+ pipeline_options.do_picture_description = do_picture_description
32
+ pipeline_options.picture_description_options = smolvlm_picture_description
33
+
34
+ print(f"Pipeline options defined: \n\t{pipeline_options}")
35
+ converter = DocumentConverter(
36
+ format_options={
37
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
38
+ }
39
+ )
40
+
41
+ result = converter.convert(file_path)
42
+
43
+ yield result.document, "Done ✅"
44
+
45
+
46
+ def to_html(docling_doc: DoclingDocument) -> str:
47
+ return docling_doc.export_to_html()
48
+
49
+
50
+ def to_markdown(docling_doc: DoclingDocument) -> str:
51
+ return docling_doc.export_to_markdown()
52
+
53
+
54
+ def to_json(docling_doc: DoclingDocument) -> Dict:
55
+ return docling_doc.export_to_dict()
56
+
57
+
58
+ def to_text(docling_doc: DoclingDocument) -> str:
59
+ return docling_doc.export_to_text()
60
+
61
+
62
+ def upload_file(file) -> str:
63
+ return file.name
64
+
65
+
66
+ def setup_gradio_demo():
67
+ with gr.Blocks() as demo:
68
+ gr.Markdown(
69
+ """ # Docling - OCR: Parse documents, images, spreadsheets and more to markdown or other formats!
70
+
71
+ Docling is very powerful tool, with lots of cool features and integrations to other AI frameworks (e.g. LlamaIndex, LangChain, and many more).
72
+
73
+ Model used for picture classification: [EfficientNet-B0 Document Image Classifier](https://huggingface.co/ds4sd/DocumentFigureClassifier)
74
+
75
+ Model used for picture description: [SmolVLM-256M-Instruct](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct)
76
+
77
+ To explore the full set of features of Docling visit: https://github.com/docling-project/docling
78
+ """
79
+ )
80
+
81
+ with gr.Row():
82
+ with gr.Column():
83
+ gr.Markdown("### 1) Upload")
84
+ file_output = gr.File(
85
+ file_count="single",
86
+ file_types=[
87
+ ".pdf",
88
+ ".docx",
89
+ ".pptx",
90
+ ".csv",
91
+ ".md",
92
+ ".png",
93
+ ".jpg",
94
+ ".tiff",
95
+ ".bmp",
96
+ ".html",
97
+ ".xhtml",
98
+ ".xlsx",
99
+ ],
100
+ )
101
+
102
+ with gr.Column():
103
+ gr.Markdown("### 2) Configure engine & Parse")
104
+ code_understanding = gr.Checkbox(
105
+ value=False, label="Enable Code understanding"
106
+ )
107
+ formula_enrichment = gr.Checkbox(
108
+ value=False, label="Enable Formula understanding"
109
+ )
110
+ picture_classification = gr.Checkbox(
111
+ value=False, label="Enable Picture classification"
112
+ )
113
+ picture_description = gr.Checkbox(
114
+ value=False, label="Enable Picture description"
115
+ )
116
+ gr.Markdown(
117
+ "_**Warning:** Enabling any of these features can potentially increase the processing time._"
118
+ )
119
+
120
+ parse_button = gr.Button("Parse document")
121
+ status = gr.Markdown()
122
+ with gr.Column():
123
+ gr.Markdown("### 3) Convert")
124
+
125
+ html_button = gr.Button("Convert to HTML")
126
+ markdown_button = gr.Button("Convert to markdown")
127
+ json_button = gr.Button("Convert to JSON")
128
+ text_button = gr.Button("Convert to text")
129
+
130
+ doc = gr.State()
131
+ output = gr.Text(label="Output")
132
+
133
+ parse_button.click(
134
+ fn=parse_document,
135
+ inputs=[
136
+ file_output,
137
+ code_understanding,
138
+ formula_enrichment,
139
+ picture_classification,
140
+ picture_description,
141
+ ],
142
+ outputs=[doc, status],
143
+ )
144
+ html_button.click(
145
+ fn=to_html,
146
+ inputs=doc,
147
+ outputs=output,
148
+ )
149
+ markdown_button.click(
150
+ fn=to_markdown,
151
+ inputs=doc,
152
+ outputs=output,
153
+ )
154
+ json_button.click(
155
+ fn=to_json,
156
+ inputs=doc,
157
+ outputs=output,
158
+ )
159
+ text_button.click(
160
+ fn=to_text,
161
+ inputs=doc,
162
+ outputs=output,
163
+ )
164
+
165
+ demo.launch()
166
+
167
+
168
+ if __name__ == "__main__":
169
+ setup_gradio_demo()