Spaces:

TensorFlo
/

AutoAssess

Sleeping

App Files Files Community

AutoAssess / src /transcribe_image.py

TensorFlo

working version

20d4a20 6 months ago

raw

history blame

2.45 kB

	import base64
	from openai import OpenAI
	import os
	from PIL import Image
	import io

	def resize_image(image, max_size=800):
	"""Resize the image to ensure the max side length is `max_size` while maintaining aspect ratio."""
	# Open the image using Pillow
	img = Image.open(image)

	# Get the current width and height of the image
	width, height = img.size

	# Resize the image if necessary
	if width > height:
	new_width = max_size
	new_height = int((new_width / width) * height)
	else:
	new_height = max_size
	new_width = int((new_height / height) * width)

	# Resize the image using the LANCZOS filter for high-quality rescaling
	img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

	# Save the resized image to a BytesIO object to later encode to base64
	img_byte_arr = io.BytesIO()
	img.save(img_byte_arr, format='PNG')
	img_byte_arr.seek(0) # Rewind the BytesIO object to the beginning

	return img_byte_arr

	# Function to encode the image
	def encode_image(image_path):
	assert os.path.exists(image_path), "The image file does not exist."
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode('utf-8')

	def encode_image_from_uploaded_file(image):
	# Convert image to bytes
	assert image is not None, "No image uploaded."
	resized_image = resize_image(image)
	image_bytes = resized_image.read()
	return base64.b64encode(image_bytes).decode('utf-8')

	def transcribe_image(image_file):
	"""Transcribe handwritten text from an image using OCR."""
	# Initialize the OpenAI client
	client = OpenAI()

	# Encoding the image
	base64_image = encode_image_from_uploaded_file(image_file)


	# Preparing the API call
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "text", "text": "Please transcribe the handwritten text in this image. Return only the text content."},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
	}
	]
	}
	],
	max_tokens=300
	)

	transcribed_text = response.choices[0].message.content

	return transcribed_text