mohammedRiad commited on
Commit
4bec0b8
Β·
verified Β·
1 Parent(s): 3f3561f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModel, AutoTokenizer
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load the model
7
+ tokenizer = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
8
+ model = AutoModel.from_pretrained(
9
+ "ucaslcl/GOT-OCR2_0",
10
+ trust_remote_code=True,
11
+ low_cpu_mem_usage=True,
12
+ use_safetensors=True,
13
+ device_map="cuda",
14
+ pad_token_id=tokenizer.eos_token_id
15
+ )
16
+ model = model.eval().cuda()
17
+
18
+ # Run inference
19
+ def run_ocr(image, task="Plain Text OCR", ocr_type="ocr", ocr_box="", ocr_color="red"):
20
+ if isinstance(image, str):
21
+ image = Image.open(image).convert("RGB")
22
+ elif isinstance(image, Image.Image):
23
+ image = image.convert("RGB")
24
+ else:
25
+ return "Invalid image input."
26
+
27
+ result = model.chat(tokenizer, image, ocr_type=ocr_type)
28
+ return result
29
+
30
+ # Gradio UI with API exposed
31
+ iface = gr.Interface(
32
+ fn=run_ocr,
33
+ inputs=[
34
+ gr.Image(type="filepath", label="Image"),
35
+ gr.Dropdown(choices=["Plain Text OCR", "Format Text OCR", "Fine-grained OCR (Box)", "Fine-grained OCR (Color)", "Multi-crop OCR", "Multi-page OCR"], value="Plain Text OCR", label="Task"),
36
+ gr.Dropdown(choices=["ocr", "format"], value="ocr", label="OCR Type"),
37
+ gr.Textbox(label="OCR Box", placeholder="Optional, e.g. [100,100,200,200]"),
38
+ gr.Dropdown(choices=["red", "green", "blue"], value="red", label="OCR Color")
39
+ ],
40
+ outputs=gr.Textbox(label="OCR Output"),
41
+ allow_flagging="never",
42
+ allow_api=True # βœ… This line enables API access!
43
+ )
44
+
45
+ iface.launch()