ai42 commited on
Commit
0102fad
·
1 Parent(s): 6d2e6dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -444
app.py CHANGED
@@ -1,444 +1,10 @@
1
- import os
2
-
3
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
-
5
- from PIL import Image, ImageDraw
6
- import traceback
7
-
8
- import pandas as pd
9
-
10
- import gradio as gr
11
-
12
- import torch
13
- from docquery import pipeline
14
- from docquery.document import load_document, ImageDocumenta
15
- from docquery.ocr_reader import get_ocr_reader
16
-
17
-
18
- def ensure_list(x):
19
- if isinstance(x, list):
20
- return x
21
- else:
22
- return [x]
23
-
24
-
25
- CHECKPOINTS = {
26
- "LayoutLMv1 🦉": "impira/layoutlm-document-qa",
27
- "LayoutLMv1 for Invoices 💸": "impira/layoutlm-invoices",
28
- "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
29
- "ggml-Vicuna": "eachadea/ggml-vicuna-13b-1.1",
30
- }
31
-
32
- PIPELINES = {}
33
-
34
-
35
- def construct_pipeline(task, model):
36
- global PIPELINES
37
- if model in PIPELINES:
38
- return PIPELINES[model]
39
-
40
- device = "cuda" if torch.cuda.is_available() else "cpu"
41
- ret = pipeline(task=task, model=CHECKPOINTS[model], device=device)
42
- PIPELINES[model] = ret
43
- return ret
44
-
45
-
46
- def run_pipeline(model, question, document, top_k):
47
- pipeline = construct_pipeline("document-question-answering", model)
48
- return pipeline(question=question, **document.context, top_k=top_k)
49
-
50
-
51
- # TODO: Move into docquery
52
- # TODO: Support words past the first page (or window?)
53
- def lift_word_boxes(document, page):
54
- return document.context["image"][page][1]
55
-
56
-
57
- def expand_bbox(word_boxes):
58
- if len(word_boxes) == 0:
59
- return None
60
-
61
- min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
62
- min_x, min_y, max_x, max_y = [min(min_x), min(min_y), max(max_x), max(max_y)]
63
- return [min_x, min_y, max_x, max_y]
64
-
65
-
66
- # LayoutLM boxes are normalized to 0, 1000
67
- def normalize_bbox(box, width, height, padding=0.005):
68
- min_x, min_y, max_x, max_y = [c / 1000 for c in box]
69
- if padding != 0:
70
- min_x = max(0, min_x - padding)
71
- min_y = max(0, min_y - padding)
72
- max_x = min(max_x + padding, 1)
73
- max_y = min(max_y + padding, 1)
74
- return [min_x * width, min_y * height, max_x * width, max_y * height]
75
-
76
-
77
- examples = [
78
- [
79
- "invoice.png",
80
- "What is the invoice number?",
81
- ],
82
- [
83
- "contract.jpeg",
84
- "What is the purchase amount?",
85
- ],
86
- [
87
- "statement.png",
88
- "What are net sales for 2020?",
89
- ],
90
- [
91
- "SaleData.xlsx",
92
- "What is the highest sale amount of televsion in east region?",
93
-
94
- ]
95
- # [
96
- # "docquery.png",
97
- # "How many likes does the space have?",
98
- # ],
99
- # [
100
- # "hacker_news.png",
101
- # "What is the title of post number 5?",
102
- # ],
103
- ]
104
-
105
- question_files = {
106
- "What are net sales for 2020?": "statement.pdf",
107
- "How many likes does the space have?": "https://huggingface.co/spaces/impira/docquery",
108
- "What is the title of post number 5?": "https://news.ycombinator.com",
109
- }
110
-
111
-
112
- def process_path(path):
113
- error = None
114
- if path:
115
- try:
116
- document = load_document(path)
117
- return (
118
- document,
119
- gr.update(visible=True, value=document.preview),
120
- gr.update(visible=True),
121
- gr.update(visible=False, value=None),
122
- gr.update(visible=False, value=None),
123
- None,
124
- )
125
- except Exception as e:
126
- traceback.print_exc()
127
- error = str(e)
128
- return (
129
- None,
130
- gr.update(visible=False, value=None),
131
- gr.update(visible=False),
132
- gr.update(visible=False, value=None),
133
- gr.update(visible=False, value=None),
134
- gr.update(visible=True, value=error) if error is not None else None,
135
- None,
136
- )
137
-
138
-
139
- def process_upload(file, excel_file):
140
- if file:
141
- return process_path(file.name)
142
- if excel_file:
143
- excel_data = pd.read_excel(excel_file)
144
- return process_path(excel_file.name)
145
- else:
146
- return (
147
- None,
148
- gr.update(visible=False, value=None),
149
- gr.update(visible=False),
150
- gr.update(visible=False, value=None),
151
- gr.update(visible=False, value=None),
152
- None,
153
- )
154
-
155
-
156
-
157
-
158
-
159
-
160
- colors = ["#64A087", "green", "black"]
161
-
162
-
163
- def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
164
- if not question or document is None:
165
- return None, None, None
166
-
167
- text_value = None
168
- predictions = run_pipeline(model, question, document, 3)
169
- pages = [x.copy().convert("RGB") for x in document.preview]
170
- for i, p in enumerate(ensure_list(predictions)):
171
- if i == 0:
172
- text_value = p["answer"]
173
- else:
174
- # Keep the code around to produce multiple boxes, but only show the top
175
- # prediction for now
176
- break
177
-
178
- if "word_ids" in p:
179
- image = pages[p["page"]]
180
- draw = ImageDraw.Draw(image, "RGBA")
181
- word_boxes = lift_word_boxes(document, p["page"])
182
- x1, y1, x2, y2 = normalize_bbox(
183
- expand_bbox([word_boxes[i] for i in p["word_ids"]]),
184
- image.width,
185
- image.height,
186
- )
187
- draw.rectangle(((x1, y1), (x2, y2)), fill=(0, 255, 0, int(0.4 * 255)))
188
-
189
- return (
190
- gr.update(visible=True, value=pages),
191
- gr.update(visible=True, value=predictions),
192
- gr.update(
193
- visible=True,
194
- value=text_value,
195
- ),
196
- )
197
-
198
-
199
- def load_example_document(img, question, model):
200
- if img is not None:
201
- if question in question_files:
202
- document = load_document(question_files[question])
203
- else:
204
- document = ImageDocument(Image.fromarray(img), get_ocr_reader())
205
- preview, answer, answer_text = process_question(question, document, model)
206
- return document, question, preview, gr.update(visible=True), answer, answer_text
207
- else:
208
- return None, None, None, gr.update(visible=False), None, None
209
-
210
-
211
- CSS = """
212
- #question input {
213
- font-size: 16px;
214
- }
215
- #url-textbox {
216
- padding: 0 !important;
217
- }
218
- #short-upload-box .w-full {
219
- min-height: 10rem !important;
220
- }
221
- /* I think something like this can be used to re-shape
222
- * the table
223
- */
224
- /*
225
- .gr-samples-table tr {
226
- display: inline;
227
- }
228
- .gr-samples-table .p-2 {
229
- width: 100px;
230
- }
231
- */
232
- #select-a-file {
233
- width: 100%;
234
- }
235
- #file-clear {
236
- padding-top: 2px !important;
237
- padding-bottom: 2px !important;
238
- padding-left: 8px !important;
239
- padding-right: 8px !important;
240
- margin-top: 10px;
241
- }
242
- .gradio-container .gr-button-primary {
243
- background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
244
- border: 1px solid #B0DCCC;
245
- border-radius: 8px;
246
- color: #1B8700;
247
- }
248
- .gradio-container.dark button#submit-button {
249
- background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%);
250
- border: 1px solid #B0DCCC;
251
- border-radius: 8px;
252
- color: #1B8700
253
- }
254
-
255
- table.gr-samples-table tr td {
256
- border: none;
257
- outline: none;
258
- }
259
-
260
- table.gr-samples-table tr td:first-of-type {
261
- width: 0%;
262
- }
263
-
264
- div#short-upload-box div.absolute {
265
- display: none !important;
266
- }
267
-
268
- gradio-app > div > div > div > div.w-full > div, .gradio-app > div > div > div > div.w-full > div {
269
- gap: 0px 2%;
270
- }
271
-
272
- gradio-app div div div div.w-full, .gradio-app div div div div.w-full {
273
- gap: 0px;
274
- }
275
-
276
- gradio-app h2, .gradio-app h2 {
277
- padding-top: 10px;
278
- }
279
-
280
- #answer {
281
- overflow-y: scroll;
282
- color: white;
283
- background: #666;
284
- border-color: #666;
285
- font-size: 20px;
286
- font-weight: bold;
287
- }
288
-
289
- #answer span {
290
- color: white;
291
- }
292
-
293
- #answer textarea {
294
- color:white;
295
- background: #777;
296
- border-color: #777;
297
- font-size: 18px;
298
- }
299
-
300
- #url-error input {
301
- color: red;
302
- }
303
- """
304
-
305
- with gr.Blocks(css=CSS) as demo:
306
- gr.Markdown("# Document Query Engine")
307
-
308
-
309
- document = gr.Variable()
310
- example_question = gr.Textbox(visible=False)
311
- example_image = gr.Image(visible=False)
312
- excel_upload = gr.File(label="Upload Excel", type="xlsx", elem_id="excel-upload-box")
313
-
314
- excel_process_button = gr.Button("Process Excel", variant="primary", elem_id="excel-process-button")
315
-
316
- with gr.Row(equal_height=True):
317
- with gr.Column():
318
- with gr.Row():
319
- gr.Markdown("## 1. Select a file", elem_id="select-a-file")
320
- img_clear_button = gr.Button(
321
- "Clear", variant="secondary", elem_id="file-clear", visible=False
322
- )
323
- image = gr.Gallery(visible=False)
324
- with gr.Row(equal_height=True):
325
- with gr.Column():
326
- with gr.Row():
327
- url = gr.Textbox(
328
- show_label=False,
329
- placeholder="URL",
330
- lines=1,
331
- max_lines=1,
332
- elem_id="url-textbox",
333
- )
334
- submit = gr.Button("Get")
335
- url_error = gr.Textbox(
336
- visible=False,
337
- elem_id="url-error",
338
- max_lines=1,
339
- interactive=False,
340
- label="Error",
341
- )
342
- gr.Markdown("— or —")
343
- upload = gr.File(label=None, interactive=True, elem_id="short-upload-box")
344
- gr.Examples(
345
- examples=examples,
346
- inputs=[example_image, example_question],
347
- )
348
-
349
- with gr.Column() as col:
350
- gr.Markdown("## 2. Ask a question")
351
- question = gr.Textbox(
352
- label="Question",
353
- placeholder="e.g. What is the invoice number?",
354
- lines=1,
355
- max_lines=1,
356
- )
357
- model = gr.Radio(
358
- choices=list(CHECKPOINTS.keys()),
359
- value=list(CHECKPOINTS.keys())[0],
360
- label="Model",
361
- )
362
-
363
- with gr.Row():
364
- clear_button = gr.Button("Clear", variant="secondary")
365
- submit_button = gr.Button(
366
- "Submit", variant="primary", elem_id="submit-button"
367
- )
368
- with gr.Column():
369
- output_text = gr.Textbox(
370
- label="Top Answer", visible=False, elem_id="answer"
371
- )
372
- output = gr.JSON(label="Output", visible=False)
373
-
374
- for cb in [img_clear_button, clear_button]:
375
- cb.click(
376
- lambda _: (
377
- gr.update(visible=False, value=None),
378
- None,
379
- gr.update(visible=False, value=None),
380
- gr.update(visible=False, value=None),
381
- gr.update(visible=False),
382
- None,
383
- None,
384
- None,
385
- gr.update(visible=False, value=None),
386
- None,
387
- ),
388
- inputs=clear_button,
389
- outputs=[
390
- image,
391
- document,
392
- output,
393
- output_text,
394
- img_clear_button,
395
- example_image,
396
- upload,
397
- url,
398
- url_error,
399
- question,
400
- ],
401
- )
402
-
403
- upload.change(
404
- fn=process_upload,
405
- inputs=[upload],
406
- outputs=[document, image, img_clear_button, output, output_text, url_error],
407
- )
408
- submit.click(
409
- fn=process_path,
410
- inputs=[url],
411
- outputs=[document, image, img_clear_button, output, output_text, url_error],
412
- )
413
-
414
- question.submit(
415
- fn=process_question,
416
- inputs=[question, document, model],
417
- outputs=[image, output, output_text],
418
- )
419
-
420
- submit_button.click(
421
- process_question,
422
- inputs=[question, document, model],
423
- outputs=[image, output, output_text],
424
- )
425
-
426
- model.change(
427
- process_question,
428
- inputs=[question, document, model],
429
- outputs=[image, output, output_text],
430
- )
431
-
432
- example_image.change(
433
- fn=load_example_document,
434
- inputs=[example_image, example_question, model],
435
- outputs=[document, question, image, img_clear_button, output, output_text],
436
- )
437
- upload.change(
438
- fn=process_upload,
439
- inputs=[upload, excel_upload],
440
- outputs=[document, image_preview, img_clear_button, output, output_text, url_error],
441
- )
442
-
443
- if __name__ == "__main__":
444
- demo.launch(enable_queue=False)
 
1
+ The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
2
+ Moving 0 files to the new cache system
3
+
4
+ 0it [00:00, ?it/s]
5
+ 0it [00:00, ?it/s]
6
+ image-classification is already registered. Overwriting pipeline for task image-classification...
7
+ Traceback (most recent call last):
8
+ File "/home/user/app/app.py", line 14, in <module>
9
+ from docquery.document import load_document, ImageDocumenta
10
+ ImportError: cannot import name 'ImageDocumenta' from 'docquery.document' (/home/user/.local/lib/python3.10/site-packages/docquery/document.py)