import base64 from openai import OpenAI import os from PIL import Image import io def resize_image(image, max_size=800): """Resize the image to ensure the max side length is `max_size` while maintaining aspect ratio.""" # Open the image using Pillow img = Image.open(image) print(img.size) # Get the current width and height of the image width, height = img.size # Resize the image if necessary if width > height: new_width = max_size new_height = int((new_width / width) * height) else: new_height = max_size new_width = int((new_height / height) * width) # Resize the image using the LANCZOS filter for high-quality rescaling img = img.resize((new_width, new_height), Image.Resampling.LANCZOS) print('after resizing', img.size) # Save the resized image to a BytesIO object to later encode to base64 img_byte_arr = io.BytesIO() img.save(img_byte_arr, format='PNG') img_byte_arr.seek(0) # Rewind the BytesIO object to the beginning return img_byte_arr # Function to encode the image def encode_image(image_path): assert os.path.exists(image_path), "The image file does not exist." with open(image_path, "rb") as image_file: return base64.b64encode(image_file.read()).decode('utf-8') def encode_image_from_uploaded_file(image): # Convert image to bytes assert image is not None, "No image uploaded." resized_image = resize_image(image) image_bytes = resized_image.read() return base64.b64encode(image_bytes).decode('utf-8') def transcribe_image(image_file): """Transcribe handwritten text from an image using OCR.""" # Initialize the OpenAI client client = OpenAI() # Encoding the image base64_image = encode_image_from_uploaded_file(image_file) # Preparing the API call response = client.chat.completions.create( model="gpt-4o-mini", messages=[ { "role": "user", "content": [ {"type": "text", "text": "Please transcribe the handwritten text in this image. Return only the text content."}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} } ] } ], max_tokens=300 ) transcribed_text = response.choices[0].message.content return transcribed_text