Spaces:
Sleeping
Sleeping
First commit
Browse files- .gitattributes +7 -0
- app.py +41 -0
- examples/acn1.png +3 -0
- examples/acn2.png +3 -0
- examples/cn2.png +3 -0
- examples/cn5.png +3 -0
- examples/cn7.png +3 -0
- examples/p1.png +3 -0
- examples/p3.png +3 -0
- utils.py +151 -0
.gitattributes
CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
examples/acn1.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
examples/acn2.png filter=lfs diff=lfs merge=lfs -text
|
38 |
+
examples/cn2.png filter=lfs diff=lfs merge=lfs -text
|
39 |
+
examples/cn5.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
examples/cn7.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
examples/p1.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
examples/p3.png filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from utils import extraire_informations_carte
|
4 |
+
|
5 |
+
def predict(img, type_doc):
|
6 |
+
data = {"Nouvelle_CNI":1,"ANCIENNE_CNI":2,"PERMIS_DE_CONDUITE":3}
|
7 |
+
type_document = data[type_doc]
|
8 |
+
result = extraire_informations_carte(img,type_document)
|
9 |
+
return result
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
image = gr.components.Image(type = "filepath")
|
16 |
+
type_document = gr.components.Dropdown(["Nouvelle_CNI","ANCIENNE_CNI","PERMIS_DE_CONDUITE"])
|
17 |
+
out_lab = gr.components.Textbox()
|
18 |
+
|
19 |
+
### 4. Gradio app ###
|
20 |
+
# Create title, description and article strings
|
21 |
+
title = "OCR FOR IMAGES ANALYSIS"
|
22 |
+
description = "WE USE OCR TO EXTRACT INFORMATIONS FROM DIFFERENT TYPES OF DOCUMENTS AND FORMALIZE THE RESULT INTO JSON."
|
23 |
+
article = "Created by data354."
|
24 |
+
|
25 |
+
# Create examples list from "examples/" directory
|
26 |
+
example_list = [["examples/" + example,i] for example,i in zip(os.listdir("examples"),["Nouvelle_CNI","Nouvelle_CNI","Nouvelle_CNI","PERMIS_DE_CONDUITE","ANCIENNE_CNI","ANCIENNE_CNI","PERMIS_DE_CONDUITE"])]
|
27 |
+
print(example_list)
|
28 |
+
#[gr.Label(label="Predictions"), # what are the outputs?
|
29 |
+
#gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
|
30 |
+
# Create examples list from "examples/" directory
|
31 |
+
# Create the Gradio demo
|
32 |
+
demo = gr.Interface(fn=predict, # mapping function from input to output
|
33 |
+
inputs= [image,type_document], #gr.Image(type="pil"), # what are the inputs?
|
34 |
+
outputs=out_lab,
|
35 |
+
examples=example_list,
|
36 |
+
title=title,
|
37 |
+
description=description,
|
38 |
+
article=article
|
39 |
+
)
|
40 |
+
# Launch the demo!
|
41 |
+
demo.launch(debug = True)
|
examples/acn1.png
ADDED
![]() |
Git LFS Details
|
examples/acn2.png
ADDED
![]() |
Git LFS Details
|
examples/cn2.png
ADDED
![]() |
Git LFS Details
|
examples/cn5.png
ADDED
![]() |
Git LFS Details
|
examples/cn7.png
ADDED
![]() |
Git LFS Details
|
examples/p1.png
ADDED
![]() |
Git LFS Details
|
examples/p3.png
ADDED
![]() |
Git LFS Details
|
utils.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from google.cloud import vision
|
3 |
+
import re
|
4 |
+
##
|
5 |
+
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'data/ocr_vision_token.json'
|
6 |
+
|
7 |
+
##
|
8 |
+
def info_new_cni(donnees):
|
9 |
+
##
|
10 |
+
informations = {}
|
11 |
+
|
12 |
+
# Utilisation d'expressions régulières pour extraire les informations spécifiques
|
13 |
+
numero_carte = re.search(r'n° (C\d+)', ' '.join(donnees))
|
14 |
+
#prenom_nom = re.search(r'Prénom\(s\)\s+(.*?)\s+Nom\s+(.*?)\s+Signature', ' '.join(donnees))
|
15 |
+
nom = re.search(r'Nom\s+(.*?)\s', ' '.join(donnees))
|
16 |
+
prenom = re.search(r'Prénom\(s\)\s+(.*?)\s+Nom\s+(.*?)', ' '.join(donnees))
|
17 |
+
date_naissance = re.search(r'Date de Naissance\s+(.*?)+(\d{2}/\d{2}/\d{4})', ' '.join(donnees))
|
18 |
+
lieu_naissance = re.search(r'Lieu de Naissance\s+(.*?)\s', ' '.join(donnees))
|
19 |
+
taille = re.search(r'Sexe Taille\s+(.*?)+(\d+,\d+)', ' '.join(donnees))
|
20 |
+
nationalite = re.search(r'Nationalité\s+(.*?)\s+\d+', ' '.join(donnees))
|
21 |
+
date_expiration = re.search(r'Date d\'expiration\s+(\d+/\d+/\d+)', ' '.join(donnees))
|
22 |
+
sexe = re.search(r'Date de Naissance\s+(.*?)+(\d{2}/\d{2}/\d{4})+(.*)', ' '.join(donnees))
|
23 |
+
|
24 |
+
# Stockage des informations extraites dans un dictionnaire
|
25 |
+
if numero_carte:
|
26 |
+
informations['Numéro de carte'] = numero_carte.group(1)
|
27 |
+
if nom :
|
28 |
+
informations['Nom'] = nom.group(1)
|
29 |
+
|
30 |
+
if prenom:
|
31 |
+
informations['Prénom'] = prenom.group(1)
|
32 |
+
|
33 |
+
if date_naissance:
|
34 |
+
informations['Date de Naissance'] = date_naissance.group(2)
|
35 |
+
if lieu_naissance:
|
36 |
+
informations['Lieu de Naissance'] = lieu_naissance.group(1)
|
37 |
+
if taille:
|
38 |
+
informations['Taille'] = taille.group(2)
|
39 |
+
if nationalite:
|
40 |
+
informations['Nationalité'] = nationalite.group(1)
|
41 |
+
if date_expiration:
|
42 |
+
informations['Date d\'expiration'] = date_expiration.group(1)
|
43 |
+
if sexe :
|
44 |
+
informations['sexe'] = sexe.group(3)[:2]
|
45 |
+
|
46 |
+
return informations
|
47 |
+
|
48 |
+
##
|
49 |
+
|
50 |
+
def info_ancien_cni(infos):
|
51 |
+
""" Extract information in row data of ocr"""
|
52 |
+
|
53 |
+
informations = {}
|
54 |
+
|
55 |
+
immatriculation_patern = r'Immatriculation:\s+(C \d{4} \d{4} \d{2})'
|
56 |
+
immatriculation = re.search(immatriculation_patern, ''.join(infos))
|
57 |
+
nom = infos[4]
|
58 |
+
prenom_pattern = r'Nom\n(.*?)\n'
|
59 |
+
prenom = re.search(prenom_pattern, '\n'.join(infos))
|
60 |
+
sexe_pattern = r'Prénoms\n(.*?)\n'
|
61 |
+
sexe = re.search(sexe_pattern, '\n'.join(infos))
|
62 |
+
taille_pattern = r'Sexe\n(.*?)\n'
|
63 |
+
taille = re.search(taille_pattern, '\n'.join(infos))
|
64 |
+
date_naiss_pattern = r'Taille\s+(.*?)+(\d+/\d+/\d+)' # r'Taille (m)\n(.*?)\n'
|
65 |
+
date_naissance = re.search(date_naiss_pattern, ' '.join(infos))
|
66 |
+
lieu_pattern = r'Date de Naissance\n(.*?)\n'
|
67 |
+
lieu_naissance = re.search(lieu_pattern, '\n'.join(infos))
|
68 |
+
valide_pattern = r'Valide jusqu\'au+(.*?)+(\d+/\d+/\d+)'
|
69 |
+
validite = re.search(valide_pattern, ' '.join(infos))
|
70 |
+
|
71 |
+
# Stockage des informations extraites dans un dictionnaire
|
72 |
+
if immatriculation:
|
73 |
+
informations['Immatriculation'] = immatriculation.group(1)
|
74 |
+
if nom :
|
75 |
+
informations['Nom'] = infos[4]
|
76 |
+
|
77 |
+
if prenom:
|
78 |
+
informations['Prénom'] = prenom.group(1)
|
79 |
+
|
80 |
+
if date_naissance:
|
81 |
+
informations['Date de Naissance'] = date_naissance.group(2)
|
82 |
+
if lieu_naissance:
|
83 |
+
informations['Lieu de Naissance'] = lieu_naissance.group(1)
|
84 |
+
if taille:
|
85 |
+
informations['Taille'] = taille.group(1)
|
86 |
+
|
87 |
+
if validite:
|
88 |
+
informations['Date d\'expiration'] = validite.group(2)
|
89 |
+
if sexe :
|
90 |
+
informations['sexe'] = sexe.group(1)
|
91 |
+
|
92 |
+
return informations
|
93 |
+
|
94 |
+
##
|
95 |
+
def filtrer_elements(liste):
|
96 |
+
elements_filtres = []
|
97 |
+
for element in liste:
|
98 |
+
if element not in ['\r',"RÉPUBLIQUE DE CÔTE D'IVOIRE", "MINISTÈRE DES TRANSPORTS", "PERMIS DE CONDUIRE"]:
|
99 |
+
elements_filtres.append(element)
|
100 |
+
return elements_filtres
|
101 |
+
|
102 |
+
def permis_de_conduite(donnees):
|
103 |
+
""" Extraire les information de permis de conduire"""
|
104 |
+
|
105 |
+
informations = {}
|
106 |
+
tab = filtrer_elements(donnees)
|
107 |
+
informations['Nom'] = tab[2]
|
108 |
+
informations['Prenoms'] = tab[4]
|
109 |
+
informations['Date_et_lieu_de_naissance'] = tab[6]
|
110 |
+
informations['Date_et_lieu_de_délivrance'] = tab[8]
|
111 |
+
informations['Categorie'] = tab[0]
|
112 |
+
informations['Numéro_du_permis_de_conduire'] = tab[10]
|
113 |
+
informations['Restriction(s)'] = tab[12:] if len(tab) > 11 else ''
|
114 |
+
|
115 |
+
return informations
|
116 |
+
|
117 |
+
|
118 |
+
# Fonction pour extraire les informations individuelles
|
119 |
+
def extraire_informations_carte(path, type_de_piece=1):
|
120 |
+
""" Detect text in identity card"""
|
121 |
+
|
122 |
+
client = vision.ImageAnnotatorClient()
|
123 |
+
|
124 |
+
with open(path,'rb') as image_file:
|
125 |
+
content = image_file.read()
|
126 |
+
|
127 |
+
image = vision.Image(content = content)
|
128 |
+
|
129 |
+
# for non dense text
|
130 |
+
#response = client.text_detection(image=image)
|
131 |
+
#for dense text
|
132 |
+
response = client.document_text_detection(image = image)
|
133 |
+
texts = response.text_annotations
|
134 |
+
ocr_texts = []
|
135 |
+
|
136 |
+
for text in texts:
|
137 |
+
ocr_texts.append(f"\r\n{text.description}")
|
138 |
+
|
139 |
+
if response.error.message :
|
140 |
+
raise Exception("{}\n For more informations check : https://cloud.google.com/apis/design/errors".format(response.error.message))
|
141 |
+
|
142 |
+
donnees = ocr_texts[0].split('\n')
|
143 |
+
|
144 |
+
if type_de_piece ==1:
|
145 |
+
return info_new_cni(donnees)
|
146 |
+
elif type_de_piece == 2:
|
147 |
+
return info_ancien_cni(donnees)
|
148 |
+
elif type_de_piece == 3:
|
149 |
+
return permis_de_conduite(donnees)
|
150 |
+
else :
|
151 |
+
return "Le traitement de ce type de document n'est pas encore pris en charge"
|