Didier commited on
Commit
6717123
·
verified ·
1 Parent(s): 0fd91f1

Upload ocr2.py

Browse files
Files changed (1) hide show
  1. ocr2.py +112 -0
ocr2.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ File: ocr2.py
3
+
4
+ Description: Optical Character Recognition (OCR) using software 2.0 models
5
+
6
+ Author: Didier Guillevic
7
+ Date: 2025-04-07
8
+ """
9
+
10
+ import os
11
+ import base64
12
+ from mistralai import Mistral
13
+
14
+ #
15
+ # MistralAI client
16
+ #
17
+ api_key = os.environ["MISTRAL_API_KEY"]
18
+ client = Mistral(api_key=api_key)
19
+
20
+
21
+ #
22
+ # Process PDF file
23
+ #
24
+ def process_pdf(pdf_path: str):
25
+ """Process given file with Mistral_OCR
26
+
27
+ Args:
28
+ pdf_path: Path to a local PDF file.
29
+
30
+ Returns:
31
+ str: The OCR result as a string.
32
+
33
+ Note:
34
+ We follow the Mistral API documentation to upload the file and
35
+ process it with OCR. The file is uploaded to the Mistral API
36
+ and the OCR is performed on the uploaded file. The result is
37
+ returned as a string.
38
+ https://docs.mistral.ai/capabilities/document/
39
+ """
40
+
41
+ uploaded_pdf = client.files.upload(
42
+ file={"file_name": pdf_path, "content": open(pdf_path, "rb"),},
43
+ purpose="ocr"
44
+ )
45
+ signed_url = client.files.get_signed_url(file_id=uploaded_pdf.id)
46
+
47
+ ocr_response = client.ocr.process(
48
+ model="mistral-ocr-latest",
49
+ document={"type": "document_url", "document_url": signed_url.url,}
50
+ )
51
+
52
+ return ocr_response.pages[0].markdown
53
+
54
+
55
+ #
56
+ # Process image file
57
+ #
58
+ def process_image(image_path: str):
59
+ """Process given image file: extract information present in image.
60
+
61
+ Args:
62
+ image_path: Path to a local image file.
63
+
64
+ Returns:
65
+ str: The OCR result as a string.
66
+
67
+ Note:
68
+ Although it should "work", when I proces an image file with Mistral_OCR,
69
+ I get an empty result. Everything appears fine, but no text is extracted.
70
+ Hence, I will send the image to a model such as as Mistral_Small (or
71
+ Mistral_Large) to extract the text present in the image.
72
+ """
73
+ messages = [
74
+ {
75
+ "role": "user",
76
+ "content": [
77
+ {
78
+ "type": "text",
79
+ "text": (
80
+ "Could you extract the information present in the image. "
81
+ "No need to repeat the task description. Simply respond."
82
+ )
83
+ },
84
+ {
85
+ "type": "image_url",
86
+ "image_url": f"data:image/jpeg;base64,{encode_image(image_path)}"
87
+ }
88
+ ]
89
+ }
90
+ ]
91
+
92
+ response = client.chat.complete(
93
+ model='mistral-small-latest',
94
+ messages=messages
95
+ )
96
+ return response.choices[0].message.content
97
+
98
+
99
+ #
100
+ # Encode images as base64
101
+ #
102
+ def encode_image(image_path):
103
+ """Encode the image to base64."""
104
+ try:
105
+ with open(image_path, "rb") as image_file:
106
+ return base64.b64encode(image_file.read()).decode('utf-8')
107
+ except FileNotFoundError:
108
+ print(f"Error: The file {image_path} was not found.")
109
+ return None
110
+ except Exception as e: # Added general exception handling
111
+ print(f"Error: {e}")
112
+ return None