Uddipan Basu Bir
Load tokenizer & processor from preprocessor subfolder
ab9088f
raw
history blame
1.34 kB
import json, base64
from io import BytesIO
from PIL import Image
import gradio as gr
from inference import OcrReorderPipeline
from transformers import (
AutoProcessor,
LayoutLMv3Model,
AutoTokenizer
)
import torch
# 1) Load from your model repo, pointing at the `preprocessor/` folder
repo = "Uddipan107/ocr-layoutlmv3-base-t5-small"
model = LayoutLMv3Model.from_pretrained(repo)
tokenizer = AutoTokenizer.from_pretrained(repo, subfolder="preprocessor")
processor = AutoProcessor.from_pretrained(repo, subfolder="preprocessor", apply_ocr=False)
# 2) Instantiate your pipeline
pipe = OcrReorderPipeline(model, tokenizer, processor, device=0)
def infer(image, words_json, boxes_json):
words = json.loads(words_json)
boxes = json.loads(boxes_json)
# Encode PIL image β†’ PNG β†’ base64
buf = BytesIO()
image.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode()
# Run your custom pipeline and return the first (only) output string
return pipe(b64, words, boxes)[0]
# 3) Gradio UI
demo = gr.Interface(
fn=infer,
inputs=[
gr.Image(type="pil", label="Image"),
gr.Textbox(label="Words (JSON list)"),
gr.Textbox(label="Boxes (JSON list)")
],
outputs="text",
title="OCR Reorder Pipeline"
)
if __name__ == "__main__":
demo.launch()