Spaces:

Uddipan107
/

ocr-reorder-space

Running

File size: 1,337 Bytes

5b9baff
 
 
 
 
 
 
 
 
 
 
 
ab9088f
5b9baff
 
ab9088f
 
 
 
 
5b9baff
 
 
 
ab9088f
 
 
 
5b9baff
ab9088f
 
5b9baff
 
ab9088f
5b9baff

import json, base64
from io import BytesIO
from PIL import Image
import gradio as gr
from inference import OcrReorderPipeline
from transformers import (
    AutoProcessor,
    LayoutLMv3Model,
    AutoTokenizer
)
import torch

# 1) Load from your model repo, pointing at the `preprocessor/` folder
repo = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model     = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)

# 2) Instantiate your pipeline
pipe = OcrReorderPipeline(model, tokenizer, processor, device=0)

def infer(image, words_json, boxes_json):
    words = json.loads(words_json)
    boxes = json.loads(boxes_json)

    # Encode PIL image → PNG → base64
    buf = BytesIO()
    image.save(buf, format="PNG")
    b64 = base64.b64encode(buf.getvalue()).decode()

    # Run your custom pipeline and return the first (only) output string
    return pipe(b64, words, boxes)[0]

# 3) Gradio UI
demo = gr.Interface(
    fn=infer,
    inputs=[
      gr.Image(type="pil", label="Image"),
      gr.Textbox(label="Words (JSON list)"),
      gr.Textbox(label="Boxes (JSON list)")
    ],
    outputs="text",
    title="OCR Reorder Pipeline"
)

if __name__ == "__main__":
    demo.launch()