Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ def get_combined_markdown(ocr_response) -> tuple:
|
|
31 |
image_data[img.id] = img.image_base64
|
32 |
markdowns.append(replace_images_in_markdown(page.markdown, image_data))
|
33 |
raw_markdowns.append(page.markdown)
|
34 |
-
return "\n\n".join(markdowns), "\n\n".join(raw_markdowns)
|
35 |
|
36 |
def get_content_type(url):
|
37 |
"""Fetch the content type of the URL."""
|
@@ -104,13 +104,13 @@ def perform_ocr_url(url, ocr_method="Mistral OCR"):
|
|
104 |
return "Unsupported file type. Please provide a URL to a PDF or an image.", ""
|
105 |
|
106 |
combined_markdown, raw_markdown = get_combined_markdown(ocr_response)
|
107 |
-
return combined_markdown,
|
108 |
|
109 |
return "## Method not supported.", "Method not supported."
|
110 |
|
111 |
with gr.Blocks() as demo:
|
112 |
gr.Markdown("# OCR App")
|
113 |
-
gr.Markdown("Upload a PDF or an image, or provide a URL to extract text and images using Mistral OCR
|
114 |
|
115 |
with gr.Tab("Upload File"):
|
116 |
file_input = gr.File(label="Upload a PDF or Image")
|
@@ -119,6 +119,14 @@ with gr.Blocks() as demo:
|
|
119 |
file_raw_output = gr.Textbox(label="Raw Markdown")
|
120 |
file_button = gr.Button("Process")
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
file_button.click(
|
123 |
fn=perform_ocr_file,
|
124 |
inputs=[file_input, ocr_method_file],
|
@@ -132,6 +140,14 @@ with gr.Blocks() as demo:
|
|
132 |
url_raw_output = gr.Textbox(label="Raw Markdown")
|
133 |
url_button = gr.Button("Process")
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
url_button.click(
|
136 |
fn=perform_ocr_url,
|
137 |
inputs=[url_input, ocr_method_url],
|
|
|
31 |
image_data[img.id] = img.image_base64
|
32 |
markdowns.append(replace_images_in_markdown(page.markdown, image_data))
|
33 |
raw_markdowns.append(page.markdown)
|
34 |
+
return "\n\n".join(markdowns), "\n\n".join(raw_markdowns)
|
35 |
|
36 |
def get_content_type(url):
|
37 |
"""Fetch the content type of the URL."""
|
|
|
104 |
return "Unsupported file type. Please provide a URL to a PDF or an image.", ""
|
105 |
|
106 |
combined_markdown, raw_markdown = get_combined_markdown(ocr_response)
|
107 |
+
return combined_markdown, raw_markdown
|
108 |
|
109 |
return "## Method not supported.", "Method not supported."
|
110 |
|
111 |
with gr.Blocks() as demo:
|
112 |
gr.Markdown("# OCR App")
|
113 |
+
gr.Markdown("Upload a PDF or an image, or provide a URL to extract text and images using Mistral OCR capabilities.\n\nLearn more in our blog post [here](https://mistral.ai/news/mistral-ocr).")
|
114 |
|
115 |
with gr.Tab("Upload File"):
|
116 |
file_input = gr.File(label="Upload a PDF or Image")
|
|
|
119 |
file_raw_output = gr.Textbox(label="Raw Markdown")
|
120 |
file_button = gr.Button("Process")
|
121 |
|
122 |
+
example_files = gr.Examples(
|
123 |
+
examples=[
|
124 |
+
"pixtral-12b.pdf",
|
125 |
+
"receipt.png"
|
126 |
+
],
|
127 |
+
inputs=[file_input]
|
128 |
+
)
|
129 |
+
|
130 |
file_button.click(
|
131 |
fn=perform_ocr_file,
|
132 |
inputs=[file_input, ocr_method_file],
|
|
|
140 |
url_raw_output = gr.Textbox(label="Raw Markdown")
|
141 |
url_button = gr.Button("Process")
|
142 |
|
143 |
+
example_urls = gr.Examples(
|
144 |
+
examples=[
|
145 |
+
"https://arxiv.org/pdf/2410.07073",
|
146 |
+
"https://raw.githubusercontent.com/mistralai/cookbook/refs/heads/main/mistral/ocr/receipt.png"
|
147 |
+
],
|
148 |
+
inputs=[url_input]
|
149 |
+
)
|
150 |
+
|
151 |
url_button.click(
|
152 |
fn=perform_ocr_url,
|
153 |
inputs=[url_input, ocr_method_url],
|