Spaces:

Didier
/

Optical_character_recognition

Running

App Files Files Community

Didier commited on about 1 month ago

Commit

6717123

verified ·

1 Parent(s): 0fd91f1

Upload ocr2.py

Browse files

Files changed (1) hide show

ocr2.py +112 -0

ocr2.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+File: ocr2.py
+Description: Optical Character Recognition (OCR) using software 2.0 models
+Author: Didier Guillevic
+Date: 2025-04-07
+"""
+import os
+import base64
+from mistralai import Mistral
+#
+# MistralAI client
+#
+api_key = os.environ["MISTRAL_API_KEY"]
+client = Mistral(api_key=api_key)
+#
+# Process PDF file
+#
+def process_pdf(pdf_path: str):
+    """Process given file with Mistral_OCR
+    Args:
+        pdf_path: Path to a local PDF file.
+    Returns:
+        str: The OCR result as a string.
+    Note:
+        We follow the Mistral API documentation to upload the file and
+        process it with OCR. The file is uploaded to the Mistral API
+        and the OCR is performed on the uploaded file. The result is
+        returned as a string.
+            https://docs.mistral.ai/capabilities/document/
+    """
+    uploaded_pdf = client.files.upload(
+        file={"file_name": pdf_path, "content": open(pdf_path, "rb"),},
+        purpose="ocr"
+    )
+    signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id)
+    ocr_response = client.ocr.process(
+        model="mistral-ocr-latest",
+        document={"type": "document_url", "document_url": signed_url.url,}
+    )
+    return ocr_response.pages[0].markdown
+#
+# Process image file
+#
+def process_image(image_path: str):
+    """Process given image file: extract information present in image.
+    Args:
+        image_path: Path to a local image file.
+    Returns:
+        str: The OCR result as a string.
+    Note:
+        Although it should "work", when I proces an image file with Mistral_OCR,
+        I get an empty result. Everything appears fine, but no text is extracted.
+        Hence, I will send the image to a model such as as Mistral_Small (or
+        Mistral_Large) to extract the text present in the image.
+    """
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": (
+                        "Could you extract the information present in the image. "
+                        "No need to repeat the task description. Simply respond."
+                    )
+                },
+                {
+                    "type": "image_url",
+                    "image_url": f"data:image/jpeg;base64,{encode_image(image_path)}"
+                }
+            ]
+        }
+    ]
+    response = client.chat.complete(
+        model='mistral-small-latest',
+        messages=messages
+    )
+    return response.choices[0].message.content
+#
+# Encode images as base64
+#
+def encode_image(image_path):
+    """Encode the image to base64."""
+    try:
+        with open(image_path, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode('utf-8')
+    except FileNotFoundError:
+        print(f"Error: The file {image_path} was not found.")
+        return None
+    except Exception as e:  # Added general exception handling
+        print(f"Error: {e}")
+        return None