Mistral-OCR-API

Running

pandora-s commited on Mar 7

Commit

1cf0187

verified ·

1 Parent(s): 2128098

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,13 +24,13 @@ def replace_images_in_markdown(markdown_str: str, images_dict: dict) -> str:
 def get_combined_markdown(ocr_response) -> tuple:
     markdowns = []
-    raw_markdonws = []
     for page in ocr_response.pages:
         image_data = {}
         for img in page.images:
             image_data[img.id] = img.image_base64
         markdowns.append(replace_images_in_markdown(page.markdown, image_data))
-        markdowns.append(page.markdown)
     return "\n\n".join(markdowns), "\n\n".join(raw_markdowns),
 def get_content_type(url):
@@ -128,7 +128,7 @@ with gr.Blocks() as demo:
     with gr.Tab("Enter URL"):
         url_input = gr.Textbox(label="Enter a URL to a PDF or Image")
         ocr_method_url = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR")
-        url_output = gr.Markdown()
         url_raw_output = gr.Textbox(label="Raw Markdown")
         url_button = gr.Button("Process")

 def get_combined_markdown(ocr_response) -> tuple:
     markdowns = []
+    raw_markdowns = []
     for page in ocr_response.pages:
         image_data = {}
         for img in page.images:
             image_data[img.id] = img.image_base64
         markdowns.append(replace_images_in_markdown(page.markdown, image_data))
+        raw_markdowns.append(page.markdown)
     return "\n\n".join(markdowns), "\n\n".join(raw_markdowns),
 def get_content_type(url):
     with gr.Tab("Enter URL"):
         url_input = gr.Textbox(label="Enter a URL to a PDF or Image")
         ocr_method_url = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR")
+        url_output = gr.Markdown(label="Rendered Markdown")
         url_raw_output = gr.Textbox(label="Raw Markdown")
         url_button = gr.Button("Process")