Spaces:
Sleeping
Sleeping
import base64 | |
from openai import OpenAI | |
import os | |
from PIL import Image | |
import io | |
def resize_image(image, max_size=800): | |
"""Resize the image to ensure the max side length is `max_size` while maintaining aspect ratio.""" | |
# Open the image using Pillow | |
img = Image.open(image) | |
# Get the current width and height of the image | |
width, height = img.size | |
# Resize the image if necessary | |
if width > height: | |
new_width = max_size | |
new_height = int((new_width / width) * height) | |
else: | |
new_height = max_size | |
new_width = int((new_height / height) * width) | |
# Resize the image using the LANCZOS filter for high-quality rescaling | |
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
# Save the resized image to a BytesIO object to later encode to base64 | |
img_byte_arr = io.BytesIO() | |
img.save(img_byte_arr, format='PNG') | |
img_byte_arr.seek(0) # Rewind the BytesIO object to the beginning | |
return img_byte_arr | |
# Function to encode the image | |
def encode_image(image_path): | |
assert os.path.exists(image_path), "The image file does not exist." | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |
def encode_image_from_uploaded_file(image): | |
# Convert image to bytes | |
assert image is not None, "No image uploaded." | |
resized_image = resize_image(image) | |
image_bytes = resized_image.read() | |
return base64.b64encode(image_bytes).decode('utf-8') | |
def transcribe_image(image_file): | |
"""Transcribe handwritten text from an image using OCR.""" | |
# Initialize the OpenAI client | |
client = OpenAI() | |
# Encoding the image | |
base64_image = encode_image_from_uploaded_file(image_file) | |
# Preparing the API call | |
response = client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Please transcribe the handwritten text in this image. Return only the text content."}, | |
{ | |
"type": "image_url", | |
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} | |
} | |
] | |
} | |
], | |
max_tokens=300 | |
) | |
transcribed_text = response.choices[0].message.content | |
return transcribed_text |