Segizu commited on
Commit
8baf080
Β·
1 Parent(s): 9c5866b
Files changed (2) hide show
  1. README.md +23 -5
  2. app.py +31 -67
README.md CHANGED
@@ -1,12 +1,30 @@
1
  ---
2
- title: Face Recognition
3
- emoji: ⚑
4
- colorFrom: red
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.23.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Facial Recognition App
3
+ emoji: πŸ”
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 3.50.2
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # Facial Recognition App
13
+
14
+ This application uses DeepFace and Facenet for facial recognition and similarity matching.
15
+
16
+ ## Hardware Requirements
17
+ - GPU: Required
18
+ - CPU: 4+ cores recommended
19
+ - RAM: 8GB+ recommended
20
+
21
+ ## Environment Setup
22
+ The application requires the following key dependencies:
23
+ - deepface
24
+ - gradio
25
+ - huggingface_hub
26
+ - datasets
27
+ - Pillow
28
+ - numpy
29
+
30
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,9 +1,9 @@
 
1
  import numpy as np
2
  from PIL import Image
3
  import gradio as gr
4
  from deepface import DeepFace
5
  from datasets import load_dataset
6
- import os
7
  import pickle
8
  from io import BytesIO
9
  from huggingface_hub import upload_file, hf_hub_download, list_repo_files
@@ -13,12 +13,15 @@ import requests
13
  import time
14
  import shutil
15
  import tarfile
 
 
16
 
 
 
17
 
18
  # πŸ” Limpiar almacenamiento temporal si existe
19
  def clean_temp_dirs():
20
  print("🧹 Limpiando carpetas temporales...")
21
-
22
  for folder in ["embeddings", "batches"]:
23
  path = Path(folder)
24
  if path.exists() and path.is_dir():
@@ -36,10 +39,9 @@ LOCAL_EMB_DIR.mkdir(exist_ok=True)
36
  HF_TOKEN = os.getenv("HF_TOKEN")
37
  headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
38
 
39
- # πŸ’Ύ ConfiguraciΓ³n de control de almacenamiento
40
  MAX_TEMP_STORAGE_GB = 40
41
  UPLOAD_EVERY = 50
42
- embeddings_to_upload = []
43
 
44
  def get_folder_size(path):
45
  total = 0
@@ -47,30 +49,12 @@ def get_folder_size(path):
47
  for f in filenames:
48
  fp = os.path.join(dirpath, f)
49
  total += os.path.getsize(fp)
50
- return total / (1024 ** 3) # En GB
51
-
52
- def flush_embeddings():
53
- global embeddings_to_upload
54
- print("πŸš€ Subiendo lote de embeddings a Hugging Face...")
55
-
56
- for emb_file in embeddings_to_upload:
57
- try:
58
- filename = emb_file.name
59
- upload_file(
60
- path_or_fileobj=str(emb_file),
61
- path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{filename}",
62
- repo_id=DATASET_ID,
63
- repo_type="dataset",
64
- token=HF_TOKEN
65
- )
66
- os.remove(emb_file)
67
- print(f"βœ… Subido y eliminado: {filename}")
68
- time.sleep(1.2) # Evita 429
69
- except Exception as e:
70
- print(f"❌ Error subiendo {filename}: {e}")
71
- continue
72
 
73
- embeddings_to_upload = []
 
 
 
74
 
75
  # βœ… Cargar CSV desde el dataset
76
  dataset = load_dataset(
@@ -81,19 +65,9 @@ dataset = load_dataset(
81
  header=0
82
  )
83
 
84
- print("βœ… ValidaciΓ³n post-carga")
85
- print(dataset[0])
86
- print("Columnas:", dataset.column_names)
87
-
88
- # πŸ”„ Preprocesamiento
89
- def preprocess_image(img: Image.Image) -> np.ndarray:
90
- img_rgb = img.convert("RGB")
91
- img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
92
- return np.array(img_resized)
93
-
94
-
95
  def build_database():
96
- print(f"πŸ“Š Uso actual de almacenamiento tempora _ INICIO_: {get_folder_size('.'):.2f} GB")
97
  print("πŸ”„ Generando embeddings...")
98
  batch_size = 10
99
  archive_batch_size = 50
@@ -107,8 +81,7 @@ def build_database():
107
  print(f"πŸ“¦ Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
108
 
109
  for j in range(len(batch["image"])):
110
- item = {"image": batch["image"][j]}
111
- image_url = item["image"]
112
 
113
  if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
114
  print(f"⚠️ Saltando {i + j} - URL invΓ‘lida: {image_url}")
@@ -117,7 +90,7 @@ def build_database():
117
  name = f"image_{i + j}"
118
  filename = LOCAL_EMB_DIR / f"{name}.pkl"
119
 
120
- # Verificar si ya existe en Hugging Face Hub
121
  try:
122
  hf_hub_download(
123
  repo_id=DATASET_ID,
@@ -149,8 +122,7 @@ def build_database():
149
  del img_processed
150
  gc.collect()
151
 
152
- # Si llegamos al tamaΓ±o de archivo por lote o espacio es crΓ­tico
153
- if len(batch_files) >= archive_batch_size or get_folder_size(".") > 40:
154
  archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
155
  with tarfile.open(archive_path, "w:gz") as tar:
156
  for file in batch_files:
@@ -158,7 +130,6 @@ def build_database():
158
 
159
  print(f"πŸ“¦ Empaquetado: {archive_path}")
160
 
161
- # Subida al Hub
162
  upload_file(
163
  path_or_fileobj=str(archive_path),
164
  path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
@@ -168,24 +139,20 @@ def build_database():
168
  )
169
  print(f"βœ… Subido: {archive_path.name}")
170
 
171
- # Borrar .pkl y el .tar.gz local
172
  for f in batch_files:
173
  f.unlink()
174
  archive_path.unlink()
175
-
176
  print("🧹 Limpieza completada tras subida")
177
 
178
  batch_files = []
179
  batch_index += 1
180
- time.sleep(2) # Pausa para evitar 429
181
- print(f"πŸ“Š Uso actual de almacenamiento tempora _ FINAL_: {get_folder_size('.'):.2f} GB")
182
-
183
 
184
  except Exception as e:
185
  print(f"❌ Error en {name}: {e}")
186
  continue
187
 
188
- # Último lote si queda algo
189
  if batch_files:
190
  archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
191
  with tarfile.open(archive_path, "w:gz") as tar:
@@ -207,8 +174,7 @@ def build_database():
207
  archive_path.unlink()
208
  print("βœ… Subida y limpieza final")
209
 
210
-
211
- # πŸ” Buscar similitudes desde archivos remotos
212
  def find_similar_faces(uploaded_image: Image.Image):
213
  try:
214
  img_processed = preprocess_image(uploaded_image)
@@ -259,20 +225,18 @@ def find_similar_faces(uploaded_image: Image.Image):
259
  summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
260
  return gallery, summary
261
 
262
- # πŸš€ Inicializar
263
- print("πŸš€ Iniciando app...")
264
- build_database()
265
-
266
  # πŸŽ›οΈ Interfaz Gradio
267
- demo = gr.Interface(
268
- fn=find_similar_faces,
269
- inputs=gr.Image(label="πŸ“€ Sube una imagen", type="pil"),
270
- outputs=[
271
- gr.Gallery(label="πŸ“Έ Rostros similares"),
272
- gr.Textbox(label="🧠 Detalle", lines=6)
273
- ],
274
- title="πŸ” Reconocimiento facial con DeepFace",
275
- description="Sube una imagen y encuentra coincidencias en el dataset privado de Hugging Face usando embeddings Facenet."
276
- )
 
 
277
 
278
  demo.launch()
 
1
+ import os
2
  import numpy as np
3
  from PIL import Image
4
  import gradio as gr
5
  from deepface import DeepFace
6
  from datasets import load_dataset
 
7
  import pickle
8
  from io import BytesIO
9
  from huggingface_hub import upload_file, hf_hub_download, list_repo_files
 
13
  import time
14
  import shutil
15
  import tarfile
16
+ import tensorflow as tf
17
+ from spaces import GPU
18
 
19
+ # πŸ” Mostrar dispositivos disponibles
20
+ print("πŸ” Dispositivos disponibles:", tf.config.list_physical_devices())
21
 
22
  # πŸ” Limpiar almacenamiento temporal si existe
23
  def clean_temp_dirs():
24
  print("🧹 Limpiando carpetas temporales...")
 
25
  for folder in ["embeddings", "batches"]:
26
  path = Path(folder)
27
  if path.exists() and path.is_dir():
 
39
  HF_TOKEN = os.getenv("HF_TOKEN")
40
  headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
41
 
42
+ # πŸ’Ύ ConfiguraciΓ³n
43
  MAX_TEMP_STORAGE_GB = 40
44
  UPLOAD_EVERY = 50
 
45
 
46
  def get_folder_size(path):
47
  total = 0
 
49
  for f in filenames:
50
  fp = os.path.join(dirpath, f)
51
  total += os.path.getsize(fp)
52
+ return total / (1024 ** 3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ def preprocess_image(img: Image.Image) -> np.ndarray:
55
+ img_rgb = img.convert("RGB")
56
+ img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
57
+ return np.array(img_resized)
58
 
59
  # βœ… Cargar CSV desde el dataset
60
  dataset = load_dataset(
 
65
  header=0
66
  )
67
 
68
+ @GPU
 
 
 
 
 
 
 
 
 
 
69
  def build_database():
70
+ print(f"πŸ“Š Uso actual de almacenamiento temporal INICIO: {get_folder_size('.'):.2f} GB")
71
  print("πŸ”„ Generando embeddings...")
72
  batch_size = 10
73
  archive_batch_size = 50
 
81
  print(f"πŸ“¦ Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
82
 
83
  for j in range(len(batch["image"])):
84
+ image_url = batch["image"][j]
 
85
 
86
  if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
87
  print(f"⚠️ Saltando {i + j} - URL invΓ‘lida: {image_url}")
 
90
  name = f"image_{i + j}"
91
  filename = LOCAL_EMB_DIR / f"{name}.pkl"
92
 
93
+ # Verificar si ya fue subido
94
  try:
95
  hf_hub_download(
96
  repo_id=DATASET_ID,
 
122
  del img_processed
123
  gc.collect()
124
 
125
+ if len(batch_files) >= archive_batch_size or get_folder_size(".") > MAX_TEMP_STORAGE_GB:
 
126
  archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
127
  with tarfile.open(archive_path, "w:gz") as tar:
128
  for file in batch_files:
 
130
 
131
  print(f"πŸ“¦ Empaquetado: {archive_path}")
132
 
 
133
  upload_file(
134
  path_or_fileobj=str(archive_path),
135
  path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
 
139
  )
140
  print(f"βœ… Subido: {archive_path.name}")
141
 
 
142
  for f in batch_files:
143
  f.unlink()
144
  archive_path.unlink()
 
145
  print("🧹 Limpieza completada tras subida")
146
 
147
  batch_files = []
148
  batch_index += 1
149
+ time.sleep(2)
150
+ print(f"πŸ“Š Uso actual FINAL: {get_folder_size('.'):.2f} GB")
 
151
 
152
  except Exception as e:
153
  print(f"❌ Error en {name}: {e}")
154
  continue
155
 
 
156
  if batch_files:
157
  archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
158
  with tarfile.open(archive_path, "w:gz") as tar:
 
174
  archive_path.unlink()
175
  print("βœ… Subida y limpieza final")
176
 
177
+ # πŸ” Buscar similitudes
 
178
  def find_similar_faces(uploaded_image: Image.Image):
179
  try:
180
  img_processed = preprocess_image(uploaded_image)
 
225
  summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
226
  return gallery, summary
227
 
 
 
 
 
228
  # πŸŽ›οΈ Interfaz Gradio
229
+ with gr.Blocks() as demo:
230
+ gr.Markdown("## πŸ” Reconocimiento facial con DeepFace + ZeroGPU")
231
+ with gr.Row():
232
+ image_input = gr.Image(label="πŸ“€ Sube una imagen", type="pil")
233
+ find_btn = gr.Button("πŸ”Ž Buscar similares")
234
+ gallery = gr.Gallery(label="πŸ“Έ Rostros similares")
235
+ summary = gr.Textbox(label="🧠 Detalle", lines=6)
236
+ find_btn.click(fn=find_similar_faces, inputs=image_input, outputs=[gallery, summary])
237
+
238
+ with gr.Row():
239
+ build_btn = gr.Button("βš™οΈ Construir base de embeddings (usa GPU)")
240
+ build_btn.click(fn=build_database, inputs=[], outputs=[])
241
 
242
  demo.launch()