Spaces:

mohammedRiad
/

got-ocr-api

Runtime error

App Files Files Community

mohammedRiad commited on 26 days ago

Commit

4bec0b8

verified ·

1 Parent(s): 3f3561f

Create app.py

Browse files

Files changed (1) hide show

app.py +45 -0

app.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import gradio as gr
+from transformers import AutoModel, AutoTokenizer
+from PIL import Image
+import torch
+# Load the model
+tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
+model = AutoModel.from_pretrained(
+    "ucaslcl/GOT-OCR2_0",
+    trust_remote_code=True,
+    low_cpu_mem_usage=True,
+    use_safetensors=True,
+    device_map="cuda",
+    pad_token_id=tokenizer.eos_token_id
+)
+model = model.eval().cuda()
+# Run inference
+def run_ocr(image, task="Plain Text OCR", ocr_type="ocr", ocr_box="", ocr_color="red"):
+    if isinstance(image, str):
+        image = Image.open(image).convert("RGB")
+    elif isinstance(image, Image.Image):
+        image = image.convert("RGB")
+    else:
+        return "Invalid image input."
+    result = model.chat(tokenizer, image, ocr_type=ocr_type)
+    return result
+# Gradio UI with API exposed
+iface = gr.Interface(
+    fn=run_ocr,
+    inputs=[
+        gr.Image(type="filepath", label="Image"),
+        gr.Dropdown(choices=["Plain Text OCR", "Format Text OCR", "Fine-grained OCR (Box)", "Fine-grained OCR (Color)", "Multi-crop OCR", "Multi-page OCR"], value="Plain Text OCR", label="Task"),
+        gr.Dropdown(choices=["ocr", "format"], value="ocr", label="OCR Type"),
+        gr.Textbox(label="OCR Box", placeholder="Optional, e.g. [100,100,200,200]"),
+        gr.Dropdown(choices=["red", "green", "blue"], value="red", label="OCR Color")
+    ],
+    outputs=gr.Textbox(label="OCR Output"),
+    allow_flagging="never",
+    allow_api=True   # ✅ This line enables API access!
+)
+iface.launch()