Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModel, AutoTokenizer | |
from PIL import Image | |
import torch | |
# Load the model | |
tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True) | |
model = AutoModel.from_pretrained( | |
"ucaslcl/GOT-OCR2_0", | |
trust_remote_code=True, | |
low_cpu_mem_usage=True, | |
use_safetensors=True, | |
device_map="cuda", | |
pad_token_id=tokenizer.eos_token_id | |
) | |
model = model.eval().cuda() | |
# Run inference | |
def run_ocr(image, task="Plain Text OCR", ocr_type="ocr", ocr_box="", ocr_color="red"): | |
if isinstance(image, str): | |
image = Image.open(image).convert("RGB") | |
elif isinstance(image, Image.Image): | |
image = image.convert("RGB") | |
else: | |
return "Invalid image input." | |
result = model.chat(tokenizer, image, ocr_type=ocr_type) | |
return result | |
# Gradio UI with API exposed | |
iface = gr.Interface( | |
fn=run_ocr, | |
inputs=[ | |
gr.Image(type="filepath", label="Image"), | |
gr.Dropdown(choices=["Plain Text OCR", "Format Text OCR", "Fine-grained OCR (Box)", "Fine-grained OCR (Color)", "Multi-crop OCR", "Multi-page OCR"], value="Plain Text OCR", label="Task"), | |
gr.Dropdown(choices=["ocr", "format"], value="ocr", label="OCR Type"), | |
gr.Textbox(label="OCR Box", placeholder="Optional, e.g. [100,100,200,200]"), | |
gr.Dropdown(choices=["red", "green", "blue"], value="red", label="OCR Color") | |
], | |
outputs=gr.Textbox(label="OCR Output"), | |
allow_flagging="never", | |
allow_api=True # β This line enables API access! | |
) | |
iface.launch() | |