Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -24,13 +24,13 @@ def replace_images_in_markdown(markdown_str: str, images_dict: dict) -> str:
|
|
24 |
|
25 |
def get_combined_markdown(ocr_response) -> tuple:
|
26 |
markdowns = []
|
27 |
-
|
28 |
for page in ocr_response.pages:
|
29 |
image_data = {}
|
30 |
for img in page.images:
|
31 |
image_data[img.id] = img.image_base64
|
32 |
markdowns.append(replace_images_in_markdown(page.markdown, image_data))
|
33 |
-
|
34 |
return "\n\n".join(markdowns), "\n\n".join(raw_markdowns),
|
35 |
|
36 |
def get_content_type(url):
|
@@ -128,7 +128,7 @@ with gr.Blocks() as demo:
|
|
128 |
with gr.Tab("Enter URL"):
|
129 |
url_input = gr.Textbox(label="Enter a URL to a PDF or Image")
|
130 |
ocr_method_url = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR")
|
131 |
-
url_output = gr.Markdown()
|
132 |
url_raw_output = gr.Textbox(label="Raw Markdown")
|
133 |
url_button = gr.Button("Process")
|
134 |
|
|
|
24 |
|
25 |
def get_combined_markdown(ocr_response) -> tuple:
|
26 |
markdowns = []
|
27 |
+
raw_markdowns = []
|
28 |
for page in ocr_response.pages:
|
29 |
image_data = {}
|
30 |
for img in page.images:
|
31 |
image_data[img.id] = img.image_base64
|
32 |
markdowns.append(replace_images_in_markdown(page.markdown, image_data))
|
33 |
+
raw_markdowns.append(page.markdown)
|
34 |
return "\n\n".join(markdowns), "\n\n".join(raw_markdowns),
|
35 |
|
36 |
def get_content_type(url):
|
|
|
128 |
with gr.Tab("Enter URL"):
|
129 |
url_input = gr.Textbox(label="Enter a URL to a PDF or Image")
|
130 |
ocr_method_url = gr.Dropdown(choices=["Mistral OCR"], label="Select OCR Method", value="Mistral OCR")
|
131 |
+
url_output = gr.Markdown(label="Rendered Markdown")
|
132 |
url_raw_output = gr.Textbox(label="Raw Markdown")
|
133 |
url_button = gr.Button("Process")
|
134 |
|