Spaces:

data354
/

OCR_Project

Sleeping

App Files Files Community

youl commited on May 13, 2024

Commit

8e212da

verified ·

1 Parent(s): 053812d

First commit

Browse files

Files changed (10) hide show

.gitattributes +7 -0
app.py +41 -0
examples/acn1.png +3 -0
examples/acn2.png +3 -0
examples/cn2.png +3 -0
examples/cn5.png +3 -0
examples/cn7.png +3 -0
examples/p1.png +3 -0
examples/p3.png +3 -0
utils.py +151 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/acn1.png filter=lfs diff=lfs merge=lfs -text
+examples/acn2.png filter=lfs diff=lfs merge=lfs -text
+examples/cn2.png filter=lfs diff=lfs merge=lfs -text
+examples/cn5.png filter=lfs diff=lfs merge=lfs -text
+examples/cn7.png filter=lfs diff=lfs merge=lfs -text
+examples/p1.png filter=lfs diff=lfs merge=lfs -text
+examples/p3.png filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import gradio as gr
+import os
+from utils import extraire_informations_carte
+def predict(img, type_doc):
+    data = {"Nouvelle_CNI":1,"ANCIENNE_CNI":2,"PERMIS_DE_CONDUITE":3}
+    type_document = data[type_doc]
+    result = extraire_informations_carte(img,type_document)
+    return result
+image = gr.components.Image(type = "filepath")
+type_document = gr.components.Dropdown(["Nouvelle_CNI","ANCIENNE_CNI","PERMIS_DE_CONDUITE"])
+out_lab = gr.components.Textbox()
+### 4. Gradio app ###
+# Create title, description and article strings
+title = "OCR FOR IMAGES ANALYSIS"
+description = "WE USE OCR TO EXTRACT INFORMATIONS FROM DIFFERENT TYPES OF DOCUMENTS AND FORMALIZE THE RESULT INTO JSON."
+article = "Created by data354."
+# Create examples list from "examples/" directory
+example_list = [["examples/" + example,i] for example,i in zip(os.listdir("examples"),["Nouvelle_CNI","Nouvelle_CNI","Nouvelle_CNI","PERMIS_DE_CONDUITE","ANCIENNE_CNI","ANCIENNE_CNI","PERMIS_DE_CONDUITE"])]
+print(example_list)
+#[gr.Label(label="Predictions"), # what are the outputs?
+#gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
+# Create examples list from "examples/" directory
+# Create the Gradio demo
+demo = gr.Interface(fn=predict, # mapping function from input to output
+                    inputs= [image,type_document], #gr.Image(type="pil"), # what are the inputs?
+                    outputs=out_lab,
+                    examples=example_list,
+                    title=title,
+                    description=description,
+                    article=article
+                   )
+# Launch the demo!
+demo.launch(debug = True)

examples/acn1.png ADDED Viewed

Git LFS Details

SHA256: 34ec501fb1da8d8f677face5d2b85a4549906354f79e467970e684d7715749b0
Pointer size: 132 Bytes
Size of remote file: 2.31 MB

examples/acn2.png ADDED Viewed

Git LFS Details

SHA256: 33505a5a48ffc2a7010c197428fc8f4a10a1b85a474a1c0612e5131fcd876f9b
Pointer size: 132 Bytes
Size of remote file: 1.62 MB

examples/cn2.png ADDED Viewed

Git LFS Details

SHA256: edb0f3a35f723b43a54a2d65327f5f004d05ca6c4c5ee6f937722f07634e79ac
Pointer size: 132 Bytes
Size of remote file: 1.92 MB

examples/cn5.png ADDED Viewed

Git LFS Details

SHA256: 9239cfc4d684fdf4c1d18fc487bc05aca6cf43bdaaebbb590e25631020a25f58
Pointer size: 132 Bytes
Size of remote file: 1.62 MB

examples/cn7.png ADDED Viewed

Git LFS Details

SHA256: 2e83a9c48bca70be85f20eb02e68dd77a9b47ef70ac8f8e8e74d337bcf4645ea
Pointer size: 132 Bytes
Size of remote file: 1.84 MB

examples/p1.png ADDED Viewed

Git LFS Details

SHA256: 8fd7a4ba002ee1cdde7556b6a51a31b8ba1404ce483b03b62098766b3311ff51
Pointer size: 132 Bytes
Size of remote file: 2.54 MB

examples/p3.png ADDED Viewed

Git LFS Details

SHA256: 8ae97c13700ddb16258fa3f88d30b8a7d4d1e794e2027ab49a63ab983b93736d
Pointer size: 132 Bytes
Size of remote file: 1.69 MB

utils.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import os
+from google.cloud import vision
+import re
+##
+os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'data/ocr_vision_token.json'
+##
+def info_new_cni(donnees):
+    ##
+    informations = {}
+    # Utilisation d'expressions régulières pour extraire les informations spécifiques
+    numero_carte = re.search(r'n° (C\d+)', ' '.join(donnees))
+    #prenom_nom = re.search(r'Prénom\(s\)\s+(.*?)\s+Nom\s+(.*?)\s+Signature', ' '.join(donnees))
+    nom = re.search(r'Nom\s+(.*?)\s', ' '.join(donnees))
+    prenom = re.search(r'Prénom\(s\)\s+(.*?)\s+Nom\s+(.*?)', ' '.join(donnees))
+    date_naissance = re.search(r'Date de Naissance\s+(.*?)+(\d{2}/\d{2}/\d{4})', ' '.join(donnees))
+    lieu_naissance = re.search(r'Lieu de Naissance\s+(.*?)\s', ' '.join(donnees))
+    taille = re.search(r'Sexe Taille\s+(.*?)+(\d+,\d+)', ' '.join(donnees))
+    nationalite = re.search(r'Nationalité\s+(.*?)\s+\d+', ' '.join(donnees))
+    date_expiration = re.search(r'Date d\'expiration\s+(\d+/\d+/\d+)', ' '.join(donnees))
+    sexe = re.search(r'Date de Naissance\s+(.*?)+(\d{2}/\d{2}/\d{4})+(.*)', ' '.join(donnees))
+    # Stockage des informations extraites dans un dictionnaire
+    if numero_carte:
+        informations['Numéro de carte'] = numero_carte.group(1)
+    if nom :
+        informations['Nom'] = nom.group(1)
+    if prenom:
+        informations['Prénom'] = prenom.group(1)
+    if date_naissance:
+        informations['Date de Naissance'] = date_naissance.group(2)
+    if lieu_naissance:
+        informations['Lieu de Naissance'] = lieu_naissance.group(1)
+    if taille:
+        informations['Taille'] = taille.group(2)
+    if nationalite:
+        informations['Nationalité'] = nationalite.group(1)
+    if date_expiration:
+        informations['Date d\'expiration'] = date_expiration.group(1)
+    if sexe :
+      informations['sexe'] =  sexe.group(3)[:2]
+    return informations
+##
+def info_ancien_cni(infos):
+  """ Extract information in row data of ocr"""
+  informations = {}
+  immatriculation_patern = r'Immatriculation:\s+(C \d{4} \d{4} \d{2})'
+  immatriculation = re.search(immatriculation_patern, ''.join(infos))
+  nom = infos[4]
+  prenom_pattern = r'Nom\n(.*?)\n'
+  prenom = re.search(prenom_pattern, '\n'.join(infos))
+  sexe_pattern = r'Prénoms\n(.*?)\n'
+  sexe = re.search(sexe_pattern, '\n'.join(infos))
+  taille_pattern = r'Sexe\n(.*?)\n'
+  taille = re.search(taille_pattern, '\n'.join(infos))
+  date_naiss_pattern = r'Taille\s+(.*?)+(\d+/\d+/\d+)' # r'Taille (m)\n(.*?)\n'
+  date_naissance = re.search(date_naiss_pattern, ' '.join(infos))
+  lieu_pattern = r'Date de Naissance\n(.*?)\n'
+  lieu_naissance = re.search(lieu_pattern, '\n'.join(infos))
+  valide_pattern = r'Valide jusqu\'au+(.*?)+(\d+/\d+/\d+)'
+  validite = re.search(valide_pattern, ' '.join(infos))
+  # Stockage des informations extraites dans un dictionnaire
+  if immatriculation:
+      informations['Immatriculation'] = immatriculation.group(1)
+  if nom :
+      informations['Nom'] =  infos[4]
+  if prenom:
+      informations['Prénom'] = prenom.group(1)
+  if date_naissance:
+      informations['Date de Naissance'] = date_naissance.group(2)
+  if lieu_naissance:
+      informations['Lieu de Naissance'] = lieu_naissance.group(1)
+  if taille:
+      informations['Taille'] = taille.group(1)
+  if validite:
+      informations['Date d\'expiration'] = validite.group(2)
+  if sexe :
+    informations['sexe'] =  sexe.group(1)
+  return informations
+  ##
+def filtrer_elements(liste):
+  elements_filtres = []
+  for element in liste:
+      if element not in ['\r',"RÉPUBLIQUE DE CÔTE D'IVOIRE", "MINISTÈRE DES TRANSPORTS", "PERMIS DE CONDUIRE"]:
+          elements_filtres.append(element)
+  return elements_filtres
+def permis_de_conduite(donnees):
+  """ Extraire les information de permis de conduire"""
+  informations = {}
+  tab = filtrer_elements(donnees)
+  informations['Nom'] = tab[2]
+  informations['Prenoms'] = tab[4]
+  informations['Date_et_lieu_de_naissance'] = tab[6]
+  informations['Date_et_lieu_de_délivrance'] = tab[8]
+  informations['Categorie'] = tab[0]
+  informations['Numéro_du_permis_de_conduire'] = tab[10]
+  informations['Restriction(s)'] = tab[12:] if len(tab) > 11 else ''
+  return informations
+# Fonction pour extraire les informations individuelles
+def extraire_informations_carte(path, type_de_piece=1):
+    """ Detect text in identity card"""
+    client = vision.ImageAnnotatorClient()
+    with open(path,'rb') as image_file:
+      content = image_file.read()
+    image = vision.Image(content = content)
+    # for non dense text
+    #response = client.text_detection(image=image)
+    #for dense text
+    response = client.document_text_detection(image = image)
+    texts = response.text_annotations
+    ocr_texts = []
+    for text in texts:
+      ocr_texts.append(f"\r\n{text.description}")
+    if response.error.message :
+      raise Exception("{}\n For more informations check : https://cloud.google.com/apis/design/errors".format(response.error.message))
+    donnees = ocr_texts[0].split('\n')
+    if type_de_piece ==1:
+      return info_new_cni(donnees)
+    elif type_de_piece == 2:
+      return info_ancien_cni(donnees)
+    elif type_de_piece == 3:
+      return permis_de_conduite(donnees)
+    else :
+      return "Le traitement de ce type de document n'est pas encore pris en charge"