captcha-ocr / captcha.py
Sumit Kumar
Add captcha resolution functionality and update requirements
2937f6c
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
from transformers import VisionEncoderDecoderModel, TrOCRProcessor
from PIL import Image
import io
import base64
# Load model and processor
processor = TrOCRProcessor.from_pretrained("anuashok/ocr-captcha-v3", use_fast=True)
model = VisionEncoderDecoderModel.from_pretrained(
"anuashok/ocr-captcha-v3")
def resolve_captcha(image_path):
# Check if input is base64 string
if isinstance(image_path, str) and image_path.startswith('data:image'):
# Extract the base64 data after the comma
base64_data = image_path.split(',')[1]
# Decode base64 to bytes
image_bytes = base64.b64decode(base64_data)
# Create PIL Image from bytes
image = Image.open(io.BytesIO(image_bytes)).convert("RGBA")
else:
# Handle as regular file path
image = Image.open(image_path).convert("RGBA")
background = Image.new("RGBA", image.size, (255, 255, 255))
combined = Image.alpha_composite(background, image).convert("RGB")
# Prepare image for the model
pixel_values = processor(combined, return_tensors="pt").pixel_values
# Generate text
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(
generated_ids, skip_special_tokens=True)[0]
return generated_text