youl commited on
Commit
8e212da
·
verified ·
1 Parent(s): 053812d

First commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/acn1.png filter=lfs diff=lfs merge=lfs -text
37
+ examples/acn2.png filter=lfs diff=lfs merge=lfs -text
38
+ examples/cn2.png filter=lfs diff=lfs merge=lfs -text
39
+ examples/cn5.png filter=lfs diff=lfs merge=lfs -text
40
+ examples/cn7.png filter=lfs diff=lfs merge=lfs -text
41
+ examples/p1.png filter=lfs diff=lfs merge=lfs -text
42
+ examples/p3.png filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from utils import extraire_informations_carte
4
+
5
+ def predict(img, type_doc):
6
+ data = {"Nouvelle_CNI":1,"ANCIENNE_CNI":2,"PERMIS_DE_CONDUITE":3}
7
+ type_document = data[type_doc]
8
+ result = extraire_informations_carte(img,type_document)
9
+ return result
10
+
11
+
12
+
13
+
14
+
15
+ image = gr.components.Image(type = "filepath")
16
+ type_document = gr.components.Dropdown(["Nouvelle_CNI","ANCIENNE_CNI","PERMIS_DE_CONDUITE"])
17
+ out_lab = gr.components.Textbox()
18
+
19
+ ### 4. Gradio app ###
20
+ # Create title, description and article strings
21
+ title = "OCR FOR IMAGES ANALYSIS"
22
+ description = "WE USE OCR TO EXTRACT INFORMATIONS FROM DIFFERENT TYPES OF DOCUMENTS AND FORMALIZE THE RESULT INTO JSON."
23
+ article = "Created by data354."
24
+
25
+ # Create examples list from "examples/" directory
26
+ example_list = [["examples/" + example,i] for example,i in zip(os.listdir("examples"),["Nouvelle_CNI","Nouvelle_CNI","Nouvelle_CNI","PERMIS_DE_CONDUITE","ANCIENNE_CNI","ANCIENNE_CNI","PERMIS_DE_CONDUITE"])]
27
+ print(example_list)
28
+ #[gr.Label(label="Predictions"), # what are the outputs?
29
+ #gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
30
+ # Create examples list from "examples/" directory
31
+ # Create the Gradio demo
32
+ demo = gr.Interface(fn=predict, # mapping function from input to output
33
+ inputs= [image,type_document], #gr.Image(type="pil"), # what are the inputs?
34
+ outputs=out_lab,
35
+ examples=example_list,
36
+ title=title,
37
+ description=description,
38
+ article=article
39
+ )
40
+ # Launch the demo!
41
+ demo.launch(debug = True)
examples/acn1.png ADDED

Git LFS Details

  • SHA256: 34ec501fb1da8d8f677face5d2b85a4549906354f79e467970e684d7715749b0
  • Pointer size: 132 Bytes
  • Size of remote file: 2.31 MB
examples/acn2.png ADDED

Git LFS Details

  • SHA256: 33505a5a48ffc2a7010c197428fc8f4a10a1b85a474a1c0612e5131fcd876f9b
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
examples/cn2.png ADDED

Git LFS Details

  • SHA256: edb0f3a35f723b43a54a2d65327f5f004d05ca6c4c5ee6f937722f07634e79ac
  • Pointer size: 132 Bytes
  • Size of remote file: 1.92 MB
examples/cn5.png ADDED

Git LFS Details

  • SHA256: 9239cfc4d684fdf4c1d18fc487bc05aca6cf43bdaaebbb590e25631020a25f58
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
examples/cn7.png ADDED

Git LFS Details

  • SHA256: 2e83a9c48bca70be85f20eb02e68dd77a9b47ef70ac8f8e8e74d337bcf4645ea
  • Pointer size: 132 Bytes
  • Size of remote file: 1.84 MB
examples/p1.png ADDED

Git LFS Details

  • SHA256: 8fd7a4ba002ee1cdde7556b6a51a31b8ba1404ce483b03b62098766b3311ff51
  • Pointer size: 132 Bytes
  • Size of remote file: 2.54 MB
examples/p3.png ADDED

Git LFS Details

  • SHA256: 8ae97c13700ddb16258fa3f88d30b8a7d4d1e794e2027ab49a63ab983b93736d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.69 MB
utils.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from google.cloud import vision
3
+ import re
4
+ ##
5
+ os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'data/ocr_vision_token.json'
6
+
7
+ ##
8
+ def info_new_cni(donnees):
9
+ ##
10
+ informations = {}
11
+
12
+ # Utilisation d'expressions régulières pour extraire les informations spécifiques
13
+ numero_carte = re.search(r'n° (C\d+)', ' '.join(donnees))
14
+ #prenom_nom = re.search(r'Prénom\(s\)\s+(.*?)\s+Nom\s+(.*?)\s+Signature', ' '.join(donnees))
15
+ nom = re.search(r'Nom\s+(.*?)\s', ' '.join(donnees))
16
+ prenom = re.search(r'Prénom\(s\)\s+(.*?)\s+Nom\s+(.*?)', ' '.join(donnees))
17
+ date_naissance = re.search(r'Date de Naissance\s+(.*?)+(\d{2}/\d{2}/\d{4})', ' '.join(donnees))
18
+ lieu_naissance = re.search(r'Lieu de Naissance\s+(.*?)\s', ' '.join(donnees))
19
+ taille = re.search(r'Sexe Taille\s+(.*?)+(\d+,\d+)', ' '.join(donnees))
20
+ nationalite = re.search(r'Nationalité\s+(.*?)\s+\d+', ' '.join(donnees))
21
+ date_expiration = re.search(r'Date d\'expiration\s+(\d+/\d+/\d+)', ' '.join(donnees))
22
+ sexe = re.search(r'Date de Naissance\s+(.*?)+(\d{2}/\d{2}/\d{4})+(.*)', ' '.join(donnees))
23
+
24
+ # Stockage des informations extraites dans un dictionnaire
25
+ if numero_carte:
26
+ informations['Numéro de carte'] = numero_carte.group(1)
27
+ if nom :
28
+ informations['Nom'] = nom.group(1)
29
+
30
+ if prenom:
31
+ informations['Prénom'] = prenom.group(1)
32
+
33
+ if date_naissance:
34
+ informations['Date de Naissance'] = date_naissance.group(2)
35
+ if lieu_naissance:
36
+ informations['Lieu de Naissance'] = lieu_naissance.group(1)
37
+ if taille:
38
+ informations['Taille'] = taille.group(2)
39
+ if nationalite:
40
+ informations['Nationalité'] = nationalite.group(1)
41
+ if date_expiration:
42
+ informations['Date d\'expiration'] = date_expiration.group(1)
43
+ if sexe :
44
+ informations['sexe'] = sexe.group(3)[:2]
45
+
46
+ return informations
47
+
48
+ ##
49
+
50
+ def info_ancien_cni(infos):
51
+ """ Extract information in row data of ocr"""
52
+
53
+ informations = {}
54
+
55
+ immatriculation_patern = r'Immatriculation:\s+(C \d{4} \d{4} \d{2})'
56
+ immatriculation = re.search(immatriculation_patern, ''.join(infos))
57
+ nom = infos[4]
58
+ prenom_pattern = r'Nom\n(.*?)\n'
59
+ prenom = re.search(prenom_pattern, '\n'.join(infos))
60
+ sexe_pattern = r'Prénoms\n(.*?)\n'
61
+ sexe = re.search(sexe_pattern, '\n'.join(infos))
62
+ taille_pattern = r'Sexe\n(.*?)\n'
63
+ taille = re.search(taille_pattern, '\n'.join(infos))
64
+ date_naiss_pattern = r'Taille\s+(.*?)+(\d+/\d+/\d+)' # r'Taille (m)\n(.*?)\n'
65
+ date_naissance = re.search(date_naiss_pattern, ' '.join(infos))
66
+ lieu_pattern = r'Date de Naissance\n(.*?)\n'
67
+ lieu_naissance = re.search(lieu_pattern, '\n'.join(infos))
68
+ valide_pattern = r'Valide jusqu\'au+(.*?)+(\d+/\d+/\d+)'
69
+ validite = re.search(valide_pattern, ' '.join(infos))
70
+
71
+ # Stockage des informations extraites dans un dictionnaire
72
+ if immatriculation:
73
+ informations['Immatriculation'] = immatriculation.group(1)
74
+ if nom :
75
+ informations['Nom'] = infos[4]
76
+
77
+ if prenom:
78
+ informations['Prénom'] = prenom.group(1)
79
+
80
+ if date_naissance:
81
+ informations['Date de Naissance'] = date_naissance.group(2)
82
+ if lieu_naissance:
83
+ informations['Lieu de Naissance'] = lieu_naissance.group(1)
84
+ if taille:
85
+ informations['Taille'] = taille.group(1)
86
+
87
+ if validite:
88
+ informations['Date d\'expiration'] = validite.group(2)
89
+ if sexe :
90
+ informations['sexe'] = sexe.group(1)
91
+
92
+ return informations
93
+
94
+ ##
95
+ def filtrer_elements(liste):
96
+ elements_filtres = []
97
+ for element in liste:
98
+ if element not in ['\r',"RÉPUBLIQUE DE CÔTE D'IVOIRE", "MINISTÈRE DES TRANSPORTS", "PERMIS DE CONDUIRE"]:
99
+ elements_filtres.append(element)
100
+ return elements_filtres
101
+
102
+ def permis_de_conduite(donnees):
103
+ """ Extraire les information de permis de conduire"""
104
+
105
+ informations = {}
106
+ tab = filtrer_elements(donnees)
107
+ informations['Nom'] = tab[2]
108
+ informations['Prenoms'] = tab[4]
109
+ informations['Date_et_lieu_de_naissance'] = tab[6]
110
+ informations['Date_et_lieu_de_délivrance'] = tab[8]
111
+ informations['Categorie'] = tab[0]
112
+ informations['Numéro_du_permis_de_conduire'] = tab[10]
113
+ informations['Restriction(s)'] = tab[12:] if len(tab) > 11 else ''
114
+
115
+ return informations
116
+
117
+
118
+ # Fonction pour extraire les informations individuelles
119
+ def extraire_informations_carte(path, type_de_piece=1):
120
+ """ Detect text in identity card"""
121
+
122
+ client = vision.ImageAnnotatorClient()
123
+
124
+ with open(path,'rb') as image_file:
125
+ content = image_file.read()
126
+
127
+ image = vision.Image(content = content)
128
+
129
+ # for non dense text
130
+ #response = client.text_detection(image=image)
131
+ #for dense text
132
+ response = client.document_text_detection(image = image)
133
+ texts = response.text_annotations
134
+ ocr_texts = []
135
+
136
+ for text in texts:
137
+ ocr_texts.append(f"\r\n{text.description}")
138
+
139
+ if response.error.message :
140
+ raise Exception("{}\n For more informations check : https://cloud.google.com/apis/design/errors".format(response.error.message))
141
+
142
+ donnees = ocr_texts[0].split('\n')
143
+
144
+ if type_de_piece ==1:
145
+ return info_new_cni(donnees)
146
+ elif type_de_piece == 2:
147
+ return info_ancien_cni(donnees)
148
+ elif type_de_piece == 3:
149
+ return permis_de_conduite(donnees)
150
+ else :
151
+ return "Le traitement de ce type de document n'est pas encore pris en charge"