File size: 2,445 Bytes
b32257c 03faea4 9488f66 b32257c cf95585 b32257c 82bf8fc 9488f66 1dc65c4 201b7ed 1dc65c4 b32257c a9156d4 bf0b467 a9156d4 79202d5 b32257c 7c1d5cb 113bcef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import os
import gradio as gr
import omegaconf
import torch
import numpy
import easyocr
from PIL import Image
from vietocr.model.transformerocr import VietOCR
from vietocr.model.vocab import Vocab
from vietocr.translate import translate, process_input
reader = easyocr.Reader(['vi'])
examples_data = os.listdir('examples')
examples_data = [os.path.join('examples', line.split('\t')[0]) for line in examples_data]
config = omegaconf.OmegaConf.load("vgg-seq2seq.yaml")
config = omegaconf.OmegaConf.to_container(config, resolve=True)
vocab = Vocab(config['vocab'])
model = VietOCR(len(vocab),
config['backbone'],
config['cnn'],
config['transformer'],
config['seq_modeling'])
model.load_state_dict(torch.load('train_old.pth', map_location=torch.device('cpu')))
def viet_ocr_predict(inp):
img = process_input(inp, config['dataset']['image_height'],
config['dataset']['image_min_width'], config['dataset']['image_max_width'])
out = translate(img, model)[0].tolist()
out = vocab.decode(out)
return out
def predict(filepath):
bounds = reader.readtext(filepath)
im = Image.open(filepath)
inp = numpy.asarray(im)
#inp = cv2.imread(filepath)
width, height, _ = inp.shape
if width>height:
height, width, _ = inp.shape
texts=''
for (bbox, text, prob) in bounds:
(tl, tr, br, bl) = bbox
tl = (int(tl[0]), int(tl[1]))
tr = (int(tr[0]), int(tr[1]))
br = (int(br[0]), int(br[1]))
bl = (int(bl[0]), int(bl[1]))
min_x = min(tl[0], tr[0], br[0], bl[0])
min_x = max(0, min_x)
max_x = max(tl[0], tr[0], br[0], bl[0])
max_x = min(width-1, max_x)
min_y = min(tl[1], tr[1], br[1], bl[1])
min_y = max(0, min_y)
max_y = max(tl[1], tr[1], br[1], bl[1])
max_y = min(height-1, max_y)
# crop the region of interest (ROI)
try:
cropped_image = inp[min_y:max_y,min_x:max_x,:] # crop the image
cropped_image = Image.fromarray(cropped_image)
out = viet_ocr_predict(cropped_image)
except:
out = text
print(out)
texts = texts + '\t' + out
return texts
gr.Interface(fn=predict,
title='Vietnamese Handwriting Recognition',
inputs=gr.Image(type='filepath'),
outputs=gr.Text(),
#examples=examples_data,
).launch() |