import gradio as gr from transformers import AutoModel, AutoTokenizer from PIL import Image import torch # Load the model tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True) model = AutoModel.from_pretrained( "ucaslcl/GOT-OCR2_0", trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, device_map="cuda", pad_token_id=tokenizer.eos_token_id ) model = model.eval().cuda() # Run inference def run_ocr(image, task="Plain Text OCR", ocr_type="ocr", ocr_box="", ocr_color="red"): if isinstance(image, str): image = Image.open(image).convert("RGB") elif isinstance(image, Image.Image): image = image.convert("RGB") else: return "Invalid image input." result = model.chat(tokenizer, image, ocr_type=ocr_type) return result # Gradio UI with API exposed iface = gr.Interface( fn=run_ocr, inputs=[ gr.Image(type="filepath", label="Image"), gr.Dropdown(choices=["Plain Text OCR", "Format Text OCR", "Fine-grained OCR (Box)", "Fine-grained OCR (Color)", "Multi-crop OCR", "Multi-page OCR"], value="Plain Text OCR", label="Task"), gr.Dropdown(choices=["ocr", "format"], value="ocr", label="OCR Type"), gr.Textbox(label="OCR Box", placeholder="Optional, e.g. [100,100,200,200]"), gr.Dropdown(choices=["red", "green", "blue"], value="red", label="OCR Color") ], outputs=gr.Textbox(label="OCR Output"), allow_flagging="never", allow_api=True # ✅ This line enables API access! ) iface.launch()