Spaces:
Running
Running
import atexit | |
import functools | |
from queue import Queue | |
from threading import Event, Thread | |
from paddleocr import PaddleOCR, draw_ocr | |
from PIL import Image | |
import gradio as gr | |
LANG_CONFIG = { | |
"ch": {"num_workers": 2}, | |
"en": {"num_workers": 2}, | |
"fr": {"num_workers": 1}, | |
"german": {"num_workers": 1}, | |
"korean": {"num_workers": 1}, | |
"japan": {"num_workers": 1}, | |
} | |
CONCURRENCY_LIMIT = 8 | |
class PaddleOCRModelManager(object): | |
def __init__(self, | |
num_workers, | |
model_factory): | |
super().__init__() | |
self._model_factory = model_factory | |
self._queue = Queue() | |
self._workers = [] | |
self._model_initialized_event = Event() | |
for _ in range(num_workers): | |
worker = Thread(target=self._worker, daemon=False) | |
worker.start() | |
self._model_initialized_event.wait() | |
self._model_initialized_event.clear() | |
self._workers.append(worker) | |
def infer(self, *args, **kwargs): | |
# XXX: Should I use a more lightweight data structure, say, a future? | |
result_queue = Queue(maxsize=1) | |
self._queue.put((args, kwargs, result_queue)) | |
success, payload = result_queue.get() | |
if success: | |
return payload | |
else: | |
raise payload | |
def close(self): | |
for _ in self._workers: | |
self._queue.put(None) | |
for worker in self._workers: | |
worker.join() | |
def _worker(self): | |
model = self._model_factory() | |
self._model_initialized_event.set() | |
while True: | |
item = self._queue.get() | |
if item is None: | |
break | |
args, kwargs, result_queue = item | |
try: | |
result = model.ocr(*args, **kwargs) | |
result_queue.put((True, result)) | |
except Exception as e: | |
result_queue.put((False, e)) | |
finally: | |
self._queue.task_done() | |
def create_model(lang): | |
return PaddleOCR(lang=lang, use_angle_cls=True, use_gpu=False) | |
model_managers = {} | |
for lang, config in LANG_CONFIG.items(): | |
model_manager = PaddleOCRModelManager(config["num_workers"], functools.partial(create_model, lang=lang)) | |
model_managers[lang] = model_manager | |
def close_model_managers(): | |
for manager in model_managers.values(): | |
manager.close() | |
# XXX: Not sure if gradio allows adding custom teardown logic | |
atexit.register(close_model_managers) | |
def inference(img, lang): | |
ocr = model_managers[lang] | |
result = ocr.infer(img, cls=True)[0] | |
img_path = img | |
image = Image.open(img_path).convert("RGB") | |
boxes = [line[0] for line in result] | |
txts = [line[1][0] for line in result] | |
scores = [line[1][1] for line in result] | |
im_show = draw_ocr(image, boxes, txts, scores, | |
font_path="./simfang.ttf") | |
return im_show | |
title = 'PaddleOCR' | |
description = ''' | |
- Gradio demo for PaddleOCR. PaddleOCR demo supports Chinese, English, French, German, Korean and Japanese. | |
- To use it, simply upload your image and choose a language from the dropdown menu, or click one of the examples to load them. Read more at the links below. | |
- [Docs](https://paddlepaddle.github.io/PaddleOCR/), [Github Repository](https://github.com/PaddlePaddle/PaddleOCR). | |
''' | |
examples = [ | |
['en_example.jpg','en'], | |
['cn_example.jpg','ch'], | |
['jp_example.jpg','japan'], | |
] | |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}" | |
gr.Interface( | |
inference, | |
[ | |
gr.Image(type='filepath', label='Input'), | |
gr.Dropdown(choices=list(LANG_CONFIG.keys()), value='en', label='language') | |
], | |
gr.Image(type='pil', label='Output'), | |
title=title, | |
description=description, | |
examples=examples, | |
cache_examples=False, | |
css=css, | |
concurrency_limit=CONCURRENCY_LIMIT, | |
).launch(debug=False) | |