Update app.py
Browse files
app.py
CHANGED
@@ -1,35 +1,69 @@
|
|
1 |
-
import os
|
2 |
-
import gradio as gr
|
3 |
-
import omegaconf
|
4 |
-
import torch
|
5 |
-
|
6 |
-
from
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
model
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
).launch()
|
|
|
1 |
+
import os
|
2 |
+
import gradio as gr
|
3 |
+
import omegaconf
|
4 |
+
import torch
|
5 |
+
import easyocr
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
from vietocr.model.transformerocr import VietOCR
|
9 |
+
from vietocr.model.vocab import Vocab
|
10 |
+
from vietocr.translate import translate, process_input
|
11 |
+
|
12 |
+
reader = easyocr.Reader(lang)
|
13 |
+
|
14 |
+
examples_data = os.listdir('examples')
|
15 |
+
examples_data = [os.path.join('examples', line.split('\t')[0]) for line in examples_data]
|
16 |
+
|
17 |
+
config = omegaconf.OmegaConf.load("vgg-seq2seq.yaml")
|
18 |
+
config = omegaconf.OmegaConf.to_container(config, resolve=True)
|
19 |
+
|
20 |
+
vocab = Vocab(config['vocab'])
|
21 |
+
model = VietOCR(len(vocab),
|
22 |
+
config['backbone'],
|
23 |
+
config['cnn'],
|
24 |
+
config['transformer'],
|
25 |
+
config['seq_modeling'])
|
26 |
+
model.load_state_dict(torch.load('train_old.pth', map_location=torch.device('cpu')))
|
27 |
+
def viet_ocr_predict(inp):
|
28 |
+
img = process_input(inp, config['dataset']['image_height'],
|
29 |
+
config['dataset']['image_min_width'], config['dataset']['image_max_width'])
|
30 |
+
out = translate(img, model)[0].tolist()
|
31 |
+
out = vocab.decode(out)
|
32 |
+
return out
|
33 |
+
def predict(filepath):
|
34 |
+
bounds = reader.readtext(filepath)
|
35 |
+
inp = Image.open(filepath)
|
36 |
+
texts=''
|
37 |
+
for (bbox, text, prob) in bounds:
|
38 |
+
(tl, tr, br, bl) = bbox
|
39 |
+
tl = (int(tl[0]), int(tl[1]))
|
40 |
+
tr = (int(tr[0]), int(tr[1]))
|
41 |
+
br = (int(br[0]), int(br[1]))
|
42 |
+
bl = (int(bl[0]), int(bl[1]))
|
43 |
+
|
44 |
+
min_x = min(tl[0], tr[0], br[0], bl[0])
|
45 |
+
min_x = max(0, min_x)
|
46 |
+
max_x = max(tl[0], tr[0], br[0], bl[0])
|
47 |
+
max_x = min(width-1, max_x)
|
48 |
+
min_y = min(tl[1], tr[1], br[1], bl[1])
|
49 |
+
min_y = max(0, min_y)
|
50 |
+
max_y = max(tl[1], tr[1], br[1], bl[1])
|
51 |
+
max_y = min(height-1, max_y)
|
52 |
+
# crop the region of interest (ROI)
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
cropped_image = img[min_y:max_y,min_x:max_x] # crop the image
|
57 |
+
cropped_image = Image.fromarray(cropped_image)
|
58 |
+
out = viet_ocr_predict(cropped_image)
|
59 |
+
|
60 |
+
texts = texts + '\t' + out
|
61 |
+
|
62 |
+
return texts
|
63 |
+
|
64 |
+
gr.Interface(fn=predict,
|
65 |
+
title='Vietnamese Handwriting Recognition',
|
66 |
+
inputs=gr.Image(type='filepath'),
|
67 |
+
outputs=gr.Text(),
|
68 |
+
examples=examples_data,
|
69 |
).launch()
|