pandora-s commited on
Commit
7acf9a1
·
verified ·
1 Parent(s): 5779733

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -31,7 +31,7 @@ def get_combined_markdown(ocr_response) -> tuple:
31
  image_data[img.id] = img.image_base64
32
  markdowns.append(replace_images_in_markdown(page.markdown, image_data))
33
  raw_markdowns.append(page.markdown)
34
- return "\n\n".join(markdowns), "\n\n".join(raw_markdowns),
35
 
36
  def get_content_type(url):
37
  """Fetch the content type of the URL."""
@@ -104,13 +104,13 @@ def perform_ocr_url(url, ocr_method="Mistral OCR"):
104
  return "Unsupported file type. Please provide a URL to a PDF or an image.", ""
105
 
106
  combined_markdown, raw_markdown = get_combined_markdown(ocr_response)
107
- return combined_markdown, raw_markdow
108
 
109
  return "## Method not supported.", "Method not supported."
110
 
111
  with gr.Blocks() as demo:
112
  gr.Markdown("# OCR App")
113
- gr.Markdown("Upload a PDF or an image, or provide a URL to extract text and images using Mistral OCR capabities.\n\nLearn more in our blog post [here](https://mistral.ai/news/mistral-ocr).")
114
 
115
  with gr.Tab("Upload File"):
116
  file_input = gr.File(label="Upload a PDF or Image")
@@ -119,6 +119,14 @@ with gr.Blocks() as demo:
119
  file_raw_output = gr.Textbox(label="Raw Markdown")
120
  file_button = gr.Button("Process")
121
 
 
 
 
 
 
 
 
 
122
  file_button.click(
123
  fn=perform_ocr_file,
124
  inputs=[file_input, ocr_method_file],
@@ -132,6 +140,14 @@ with gr.Blocks() as demo:
132
  url_raw_output = gr.Textbox(label="Raw Markdown")
133
  url_button = gr.Button("Process")
134
 
 
 
 
 
 
 
 
 
135
  url_button.click(
136
  fn=perform_ocr_url,
137
  inputs=[url_input, ocr_method_url],
 
31
  image_data[img.id] = img.image_base64
32
  markdowns.append(replace_images_in_markdown(page.markdown, image_data))
33
  raw_markdowns.append(page.markdown)
34
+ return "\n\n".join(markdowns), "\n\n".join(raw_markdowns)
35
 
36
  def get_content_type(url):
37
  """Fetch the content type of the URL."""
 
104
  return "Unsupported file type. Please provide a URL to a PDF or an image.", ""
105
 
106
  combined_markdown, raw_markdown = get_combined_markdown(ocr_response)
107
+ return combined_markdown, raw_markdown
108
 
109
  return "## Method not supported.", "Method not supported."
110
 
111
  with gr.Blocks() as demo:
112
  gr.Markdown("# OCR App")
113
+ gr.Markdown("Upload a PDF or an image, or provide a URL to extract text and images using Mistral OCR capabilities.\n\nLearn more in our blog post [here](https://mistral.ai/news/mistral-ocr).")
114
 
115
  with gr.Tab("Upload File"):
116
  file_input = gr.File(label="Upload a PDF or Image")
 
119
  file_raw_output = gr.Textbox(label="Raw Markdown")
120
  file_button = gr.Button("Process")
121
 
122
+ example_files = gr.Examples(
123
+ examples=[
124
+ "pixtral-12b.pdf",
125
+ "receipt.png"
126
+ ],
127
+ inputs=[file_input]
128
+ )
129
+
130
  file_button.click(
131
  fn=perform_ocr_file,
132
  inputs=[file_input, ocr_method_file],
 
140
  url_raw_output = gr.Textbox(label="Raw Markdown")
141
  url_button = gr.Button("Process")
142
 
143
+ example_urls = gr.Examples(
144
+ examples=[
145
+ "https://arxiv.org/pdf/2410.07073",
146
+ "https://raw.githubusercontent.com/mistralai/cookbook/refs/heads/main/mistral/ocr/receipt.png"
147
+ ],
148
+ inputs=[url_input]
149
+ )
150
+
151
  url_button.click(
152
  fn=perform_ocr_url,
153
  inputs=[url_input, ocr_method_url],