Spaces:
Runtime error
Runtime error
ZeroGPU
Browse files
README.md
CHANGED
@@ -1,12 +1,30 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Facial Recognition App
|
3 |
+
emoji: π
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.50.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
# Facial Recognition App
|
13 |
+
|
14 |
+
This application uses DeepFace and Facenet for facial recognition and similarity matching.
|
15 |
+
|
16 |
+
## Hardware Requirements
|
17 |
+
- GPU: Required
|
18 |
+
- CPU: 4+ cores recommended
|
19 |
+
- RAM: 8GB+ recommended
|
20 |
+
|
21 |
+
## Environment Setup
|
22 |
+
The application requires the following key dependencies:
|
23 |
+
- deepface
|
24 |
+
- gradio
|
25 |
+
- huggingface_hub
|
26 |
+
- datasets
|
27 |
+
- Pillow
|
28 |
+
- numpy
|
29 |
+
|
30 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
|
|
1 |
import numpy as np
|
2 |
from PIL import Image
|
3 |
import gradio as gr
|
4 |
from deepface import DeepFace
|
5 |
from datasets import load_dataset
|
6 |
-
import os
|
7 |
import pickle
|
8 |
from io import BytesIO
|
9 |
from huggingface_hub import upload_file, hf_hub_download, list_repo_files
|
@@ -13,12 +13,15 @@ import requests
|
|
13 |
import time
|
14 |
import shutil
|
15 |
import tarfile
|
|
|
|
|
16 |
|
|
|
|
|
17 |
|
18 |
# π Limpiar almacenamiento temporal si existe
|
19 |
def clean_temp_dirs():
|
20 |
print("π§Ή Limpiando carpetas temporales...")
|
21 |
-
|
22 |
for folder in ["embeddings", "batches"]:
|
23 |
path = Path(folder)
|
24 |
if path.exists() and path.is_dir():
|
@@ -36,10 +39,9 @@ LOCAL_EMB_DIR.mkdir(exist_ok=True)
|
|
36 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
37 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
|
38 |
|
39 |
-
# πΎ ConfiguraciΓ³n
|
40 |
MAX_TEMP_STORAGE_GB = 40
|
41 |
UPLOAD_EVERY = 50
|
42 |
-
embeddings_to_upload = []
|
43 |
|
44 |
def get_folder_size(path):
|
45 |
total = 0
|
@@ -47,30 +49,12 @@ def get_folder_size(path):
|
|
47 |
for f in filenames:
|
48 |
fp = os.path.join(dirpath, f)
|
49 |
total += os.path.getsize(fp)
|
50 |
-
return total / (1024 ** 3)
|
51 |
-
|
52 |
-
def flush_embeddings():
|
53 |
-
global embeddings_to_upload
|
54 |
-
print("π Subiendo lote de embeddings a Hugging Face...")
|
55 |
-
|
56 |
-
for emb_file in embeddings_to_upload:
|
57 |
-
try:
|
58 |
-
filename = emb_file.name
|
59 |
-
upload_file(
|
60 |
-
path_or_fileobj=str(emb_file),
|
61 |
-
path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{filename}",
|
62 |
-
repo_id=DATASET_ID,
|
63 |
-
repo_type="dataset",
|
64 |
-
token=HF_TOKEN
|
65 |
-
)
|
66 |
-
os.remove(emb_file)
|
67 |
-
print(f"β
Subido y eliminado: {filename}")
|
68 |
-
time.sleep(1.2) # Evita 429
|
69 |
-
except Exception as e:
|
70 |
-
print(f"β Error subiendo {filename}: {e}")
|
71 |
-
continue
|
72 |
|
73 |
-
|
|
|
|
|
|
|
74 |
|
75 |
# β
Cargar CSV desde el dataset
|
76 |
dataset = load_dataset(
|
@@ -81,19 +65,9 @@ dataset = load_dataset(
|
|
81 |
header=0
|
82 |
)
|
83 |
|
84 |
-
|
85 |
-
print(dataset[0])
|
86 |
-
print("Columnas:", dataset.column_names)
|
87 |
-
|
88 |
-
# π Preprocesamiento
|
89 |
-
def preprocess_image(img: Image.Image) -> np.ndarray:
|
90 |
-
img_rgb = img.convert("RGB")
|
91 |
-
img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
|
92 |
-
return np.array(img_resized)
|
93 |
-
|
94 |
-
|
95 |
def build_database():
|
96 |
-
print(f"π Uso actual de almacenamiento
|
97 |
print("π Generando embeddings...")
|
98 |
batch_size = 10
|
99 |
archive_batch_size = 50
|
@@ -107,8 +81,7 @@ def build_database():
|
|
107 |
print(f"π¦ Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
|
108 |
|
109 |
for j in range(len(batch["image"])):
|
110 |
-
|
111 |
-
image_url = item["image"]
|
112 |
|
113 |
if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
|
114 |
print(f"β οΈ Saltando {i + j} - URL invΓ‘lida: {image_url}")
|
@@ -117,7 +90,7 @@ def build_database():
|
|
117 |
name = f"image_{i + j}"
|
118 |
filename = LOCAL_EMB_DIR / f"{name}.pkl"
|
119 |
|
120 |
-
# Verificar si ya
|
121 |
try:
|
122 |
hf_hub_download(
|
123 |
repo_id=DATASET_ID,
|
@@ -149,8 +122,7 @@ def build_database():
|
|
149 |
del img_processed
|
150 |
gc.collect()
|
151 |
|
152 |
-
|
153 |
-
if len(batch_files) >= archive_batch_size or get_folder_size(".") > 40:
|
154 |
archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
|
155 |
with tarfile.open(archive_path, "w:gz") as tar:
|
156 |
for file in batch_files:
|
@@ -158,7 +130,6 @@ def build_database():
|
|
158 |
|
159 |
print(f"π¦ Empaquetado: {archive_path}")
|
160 |
|
161 |
-
# Subida al Hub
|
162 |
upload_file(
|
163 |
path_or_fileobj=str(archive_path),
|
164 |
path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
|
@@ -168,24 +139,20 @@ def build_database():
|
|
168 |
)
|
169 |
print(f"β
Subido: {archive_path.name}")
|
170 |
|
171 |
-
# Borrar .pkl y el .tar.gz local
|
172 |
for f in batch_files:
|
173 |
f.unlink()
|
174 |
archive_path.unlink()
|
175 |
-
|
176 |
print("π§Ή Limpieza completada tras subida")
|
177 |
|
178 |
batch_files = []
|
179 |
batch_index += 1
|
180 |
-
time.sleep(2)
|
181 |
-
print(f"π Uso actual
|
182 |
-
|
183 |
|
184 |
except Exception as e:
|
185 |
print(f"β Error en {name}: {e}")
|
186 |
continue
|
187 |
|
188 |
-
# Γltimo lote si queda algo
|
189 |
if batch_files:
|
190 |
archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
|
191 |
with tarfile.open(archive_path, "w:gz") as tar:
|
@@ -207,8 +174,7 @@ def build_database():
|
|
207 |
archive_path.unlink()
|
208 |
print("β
Subida y limpieza final")
|
209 |
|
210 |
-
|
211 |
-
# π Buscar similitudes desde archivos remotos
|
212 |
def find_similar_faces(uploaded_image: Image.Image):
|
213 |
try:
|
214 |
img_processed = preprocess_image(uploaded_image)
|
@@ -259,20 +225,18 @@ def find_similar_faces(uploaded_image: Image.Image):
|
|
259 |
summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
|
260 |
return gallery, summary
|
261 |
|
262 |
-
# π Inicializar
|
263 |
-
print("π Iniciando app...")
|
264 |
-
build_database()
|
265 |
-
|
266 |
# ποΈ Interfaz Gradio
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
gr.
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
)
|
|
|
|
|
277 |
|
278 |
demo.launch()
|
|
|
1 |
+
import os
|
2 |
import numpy as np
|
3 |
from PIL import Image
|
4 |
import gradio as gr
|
5 |
from deepface import DeepFace
|
6 |
from datasets import load_dataset
|
|
|
7 |
import pickle
|
8 |
from io import BytesIO
|
9 |
from huggingface_hub import upload_file, hf_hub_download, list_repo_files
|
|
|
13 |
import time
|
14 |
import shutil
|
15 |
import tarfile
|
16 |
+
import tensorflow as tf
|
17 |
+
from spaces import GPU
|
18 |
|
19 |
+
# π Mostrar dispositivos disponibles
|
20 |
+
print("π Dispositivos disponibles:", tf.config.list_physical_devices())
|
21 |
|
22 |
# π Limpiar almacenamiento temporal si existe
|
23 |
def clean_temp_dirs():
|
24 |
print("π§Ή Limpiando carpetas temporales...")
|
|
|
25 |
for folder in ["embeddings", "batches"]:
|
26 |
path = Path(folder)
|
27 |
if path.exists() and path.is_dir():
|
|
|
39 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
40 |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
|
41 |
|
42 |
+
# πΎ ConfiguraciΓ³n
|
43 |
MAX_TEMP_STORAGE_GB = 40
|
44 |
UPLOAD_EVERY = 50
|
|
|
45 |
|
46 |
def get_folder_size(path):
|
47 |
total = 0
|
|
|
49 |
for f in filenames:
|
50 |
fp = os.path.join(dirpath, f)
|
51 |
total += os.path.getsize(fp)
|
52 |
+
return total / (1024 ** 3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
def preprocess_image(img: Image.Image) -> np.ndarray:
|
55 |
+
img_rgb = img.convert("RGB")
|
56 |
+
img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
|
57 |
+
return np.array(img_resized)
|
58 |
|
59 |
# β
Cargar CSV desde el dataset
|
60 |
dataset = load_dataset(
|
|
|
65 |
header=0
|
66 |
)
|
67 |
|
68 |
+
@GPU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def build_database():
|
70 |
+
print(f"π Uso actual de almacenamiento temporal INICIO: {get_folder_size('.'):.2f} GB")
|
71 |
print("π Generando embeddings...")
|
72 |
batch_size = 10
|
73 |
archive_batch_size = 50
|
|
|
81 |
print(f"π¦ Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
|
82 |
|
83 |
for j in range(len(batch["image"])):
|
84 |
+
image_url = batch["image"][j]
|
|
|
85 |
|
86 |
if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
|
87 |
print(f"β οΈ Saltando {i + j} - URL invΓ‘lida: {image_url}")
|
|
|
90 |
name = f"image_{i + j}"
|
91 |
filename = LOCAL_EMB_DIR / f"{name}.pkl"
|
92 |
|
93 |
+
# Verificar si ya fue subido
|
94 |
try:
|
95 |
hf_hub_download(
|
96 |
repo_id=DATASET_ID,
|
|
|
122 |
del img_processed
|
123 |
gc.collect()
|
124 |
|
125 |
+
if len(batch_files) >= archive_batch_size or get_folder_size(".") > MAX_TEMP_STORAGE_GB:
|
|
|
126 |
archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
|
127 |
with tarfile.open(archive_path, "w:gz") as tar:
|
128 |
for file in batch_files:
|
|
|
130 |
|
131 |
print(f"π¦ Empaquetado: {archive_path}")
|
132 |
|
|
|
133 |
upload_file(
|
134 |
path_or_fileobj=str(archive_path),
|
135 |
path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
|
|
|
139 |
)
|
140 |
print(f"β
Subido: {archive_path.name}")
|
141 |
|
|
|
142 |
for f in batch_files:
|
143 |
f.unlink()
|
144 |
archive_path.unlink()
|
|
|
145 |
print("π§Ή Limpieza completada tras subida")
|
146 |
|
147 |
batch_files = []
|
148 |
batch_index += 1
|
149 |
+
time.sleep(2)
|
150 |
+
print(f"π Uso actual FINAL: {get_folder_size('.'):.2f} GB")
|
|
|
151 |
|
152 |
except Exception as e:
|
153 |
print(f"β Error en {name}: {e}")
|
154 |
continue
|
155 |
|
|
|
156 |
if batch_files:
|
157 |
archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
|
158 |
with tarfile.open(archive_path, "w:gz") as tar:
|
|
|
174 |
archive_path.unlink()
|
175 |
print("β
Subida y limpieza final")
|
176 |
|
177 |
+
# π Buscar similitudes
|
|
|
178 |
def find_similar_faces(uploaded_image: Image.Image):
|
179 |
try:
|
180 |
img_processed = preprocess_image(uploaded_image)
|
|
|
225 |
summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
|
226 |
return gallery, summary
|
227 |
|
|
|
|
|
|
|
|
|
228 |
# ποΈ Interfaz Gradio
|
229 |
+
with gr.Blocks() as demo:
|
230 |
+
gr.Markdown("## π Reconocimiento facial con DeepFace + ZeroGPU")
|
231 |
+
with gr.Row():
|
232 |
+
image_input = gr.Image(label="π€ Sube una imagen", type="pil")
|
233 |
+
find_btn = gr.Button("π Buscar similares")
|
234 |
+
gallery = gr.Gallery(label="πΈ Rostros similares")
|
235 |
+
summary = gr.Textbox(label="π§ Detalle", lines=6)
|
236 |
+
find_btn.click(fn=find_similar_faces, inputs=image_input, outputs=[gallery, summary])
|
237 |
+
|
238 |
+
with gr.Row():
|
239 |
+
build_btn = gr.Button("βοΈ Construir base de embeddings (usa GPU)")
|
240 |
+
build_btn.click(fn=build_database, inputs=[], outputs=[])
|
241 |
|
242 |
demo.launch()
|