Spaces:
Sleeping
Sleeping
import os | |
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' | |
from transformers import VisionEncoderDecoderModel, TrOCRProcessor | |
from PIL import Image | |
import io | |
import base64 | |
# Load model and processor | |
processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3", use_fast=True) | |
model = VisionEncoderDecoderModel.from_pretrained( | |
"anuashok/ocr-captcha-v3") | |
def resolve_captcha(image_path): | |
# Check if input is base64 string | |
if isinstance(image_path, str) and image_path.startswith('data:image'): | |
# Extract the base64 data after the comma | |
base64_data = image_path.split(',')[1] | |
# Decode base64 to bytes | |
image_bytes = base64.b64decode(base64_data) | |
# Create PIL Image from bytes | |
image = Image.open(io.BytesIO(image_bytes)).convert("RGBA") | |
else: | |
# Handle as regular file path | |
image = Image.open(image_path).convert("RGBA") | |
background = Image.new("RGBA", image.size, (255, 255, 255)) | |
combined = Image.alpha_composite(background, image).convert("RGB") | |
# Prepare image for the model | |
pixel_values = processor(combined, return_tensors="pt").pixel_values | |
# Generate text | |
generated_ids = model.generate(pixel_values) | |
generated_text = processor.batch_decode( | |
generated_ids, skip_special_tokens=True)[0] | |
return generated_text | |