Spaces:

Segizu
/

Face_Recognition

Runtime error

App Files Files Community

hf_token

by Segizu - opened about 14 hours ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+47

-3371

Files changed (3) hide show

app.py +47 -176
metadata.csv +0 -0
metadata.py +0 -23

app.py CHANGED Viewed

@@ -2,148 +2,41 @@ import numpy as np
 from PIL import Image
 import gradio as gr
 from deepface import DeepFace
-from datasets import load_dataset
 import os
-import pickle
-from io import BytesIO
-from huggingface_hub import upload_file, hf_hub_download, list_repo_files
-from pathlib import Path
-import gc
-import requests
-import time
-import shutil
-# 📁 Parámetros
-DATASET_ID = "Segizu/facial-recognition"
-EMBEDDINGS_SUBFOLDER = "embeddings"
-LOCAL_EMB_DIR = Path("embeddings")
-LOCAL_EMB_DIR.mkdir(exist_ok=True)
-HF_TOKEN = os.getenv("HF_TOKEN")
-headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
-# 💾 Configuración de control de almacenamiento
-MAX_TEMP_STORAGE_GB = 40
-UPLOAD_EVERY = 50
-embeddings_to_upload = []
-def get_folder_size(path):
-    total = 0
-    for dirpath, _, filenames in os.walk(path):
-        for f in filenames:
-            fp = os.path.join(dirpath, f)
-            total += os.path.getsize(fp)
-    return total / (1024 ** 3)  # En GB
-def flush_embeddings():
-    global embeddings_to_upload
-    print("🚀 Subiendo lote de embeddings a Hugging Face...")
-    for emb_file in embeddings_to_upload:
-        try:
-            filename = emb_file.name
-            upload_file(
-                path_or_fileobj=str(emb_file),
-                path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{filename}",
-                repo_id=DATASET_ID,
-                repo_type="dataset",
-                token=HF_TOKEN
-            )
-            os.remove(emb_file)
-            print(f"✅ Subido y eliminado: {filename}")
-            time.sleep(1.2)  # Evita 429
-        except Exception as e:
-            print(f"❌ Error subiendo {filename}: {e}")
-            continue
-    embeddings_to_upload = []
-# ✅ Cargar CSV desde el dataset
-dataset = load_dataset(
-    "csv",
-    data_files="metadata.csv",
-    split="train",
-    column_names=["image"],
-    header=0
-)
-print("✅ Validación post-carga")
-print(dataset[0])
-print("Columnas:", dataset.column_names)
-# 🔄 Preprocesamiento
-def preprocess_image(img: Image.Image) -> np.ndarray:
     img_rgb = img.convert("RGB")
     img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
     return np.array(img_resized)
-# 📦 Generar y subir embeddings
 def build_database():
-    print("🔄 Generando embeddings...")
-    batch_size = 10
-    for i in range(0, len(dataset), batch_size):
-        batch = dataset[i:i + batch_size]
-        print(f"📦 Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
-        for j in range(len(batch["image"])):
-            item = {"image": batch["image"][j]}
-            image_url = item["image"]
-            if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
-                print(f"⚠️ Saltando {i + j} - URL inválida: {image_url}")
-                continue
-            name = f"image_{i + j}"
-            filename = LOCAL_EMB_DIR / f"{name}.pkl"
-            # Verificar si ya existe en HF
-            try:
-                hf_hub_download(
-                    repo_id=DATASET_ID,
-                    repo_type="dataset",
-                    filename=f"{EMBEDDINGS_SUBFOLDER}/{name}.pkl",
-                    token=HF_TOKEN
-                )
-                print(f"⏩ Ya existe remoto: {name}.pkl")
-                continue
-            except:
-                pass
-            try:
-                response = requests.get(image_url, headers=headers, timeout=10)
-                response.raise_for_status()
-                img = Image.open(BytesIO(response.content)).convert("RGB")
-                img_processed = preprocess_image(img)
-                embedding = DeepFace.represent(
-                    img_path=img_processed,
-                    model_name="Facenet",
-                    enforce_detection=False
-                )[0]["embedding"]
-                # Guardar temporal
-                with open(filename, "wb") as f:
-                    pickle.dump({"name": name, "img": img, "embedding": embedding}, f)
-                embeddings_to_upload.append(filename)
-                # Si excede límites, subir batch
-                if get_folder_size(LOCAL_EMB_DIR) >= MAX_TEMP_STORAGE_GB or len(embeddings_to_upload) >= UPLOAD_EVERY:
-                    flush_embeddings()
-                del img_processed
-                gc.collect()
-            except Exception as e:
-                print(f"❌ Error en {name}: {e}")
-                continue
-    # Subir lo que quede
-    if embeddings_to_upload:
-        flush_embeddings()
-# 🔍 Buscar similitudes desde archivos remotos
-def find_similar_faces(uploaded_image: Image.Image):
     try:
         img_processed = preprocess_image(uploaded_image)
         query_embedding = DeepFace.represent(
@@ -151,62 +44,40 @@ def find_similar_faces(uploaded_image: Image.Image):
             model_name="Facenet",
             enforce_detection=False
         )[0]["embedding"]
-        del img_processed
-        gc.collect()
-    except Exception as e:
-        return [], f"⚠ Error procesando imagen: {str(e)}"
     similarities = []
-    try:
-        embedding_files = [
-            f for f in list_repo_files(DATASET_ID, repo_type="dataset", token=HF_TOKEN)
-            if f.startswith(f"{EMBEDDINGS_SUBFOLDER}/") and f.endswith(".pkl")
-        ]
-    except Exception as e:
-        return [], f"⚠ Error obteniendo archivos: {str(e)}"
-    for file_path in embedding_files:
-        try:
-            file_bytes = requests.get(
-                f"https://huggingface.co/datasets/{DATASET_ID}/resolve/main/{file_path}",
-                headers=headers,
-                timeout=10
-            ).content
-            record = pickle.loads(file_bytes)
-            name = record["name"]
-            img = record["img"]
-            emb = record["embedding"]
-            dist = np.linalg.norm(np.array(query_embedding) - np.array(emb))
-            sim_score = 1 / (1 + dist)
-            similarities.append((sim_score, name, np.array(img)))
-        except Exception as e:
-            print(f"⚠ Error con {file_path}: {e}")
-            continue
-    similarities.sort(reverse=True)
-    top = similarities[:5]
-    gallery = [(img, f"{name} - Similitud: {sim:.2f}") for sim, name, img in top]
-    summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
-    return gallery, summary
-# 🚀 Inicializar
-print("🚀 Iniciando app...")
-build_database()
 # 🎛️ Interfaz Gradio
 demo = gr.Interface(
     fn=find_similar_faces,
     inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
     outputs=[
-        gr.Gallery(label="📸 Rostros similares"),
-        gr.Textbox(label="🧠 Detalle", lines=6)
     ],
-    title="🔍 Reconocimiento facial con DeepFace",
-    description="Sube una imagen y encuentra coincidencias en el dataset privado de Hugging Face usando embeddings Facenet."
 )
 demo.launch()

 from PIL import Image
 import gradio as gr
 from deepface import DeepFace
+from datasets import load_dataset, DownloadConfig
 import os
+os.system("rm -rf ~/.cache/huggingface/hub/datasets--Segizu--dataset_faces")
+# ✅ Cargar el dataset de Hugging Face forzando la descarga limpia
+download_config = DownloadConfig(force_download=True)
+dataset = load_dataset("Segizu/dataset_faces", download_config=download_config)
+if "train" in dataset:
+    dataset = dataset["train"]
+# 🔄 Preprocesar imagen para Facenet
+def preprocess_image(img):
     img_rgb = img.convert("RGB")
     img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
     return np.array(img_resized)
+# 📦 Construir base de datos de embeddings
 def build_database():
+    database = []
+    for i, item in enumerate(dataset):
+        try:
+            img = item["image"]
+            img_processed = preprocess_image(img)
+            embedding = DeepFace.represent(
+                img_path=img_processed,
+                model_name="Facenet",
+                enforce_detection=False
+            )[0]["embedding"]
+            database.append((f"image_{i}", img, embedding))
+        except Exception as e:
+            print(f"❌ No se pudo procesar imagen {i}: {e}")
+    return database
+# 🔍 Buscar rostros similares
+def find_similar_faces(uploaded_image):
     try:
         img_processed = preprocess_image(uploaded_image)
         query_embedding = DeepFace.represent(
             model_name="Facenet",
             enforce_detection=False
         )[0]["embedding"]
+    except:
+        return [], "⚠ No se detectó un rostro válido en la imagen."
     similarities = []
+    for name, db_img, embedding in database:
+        dist = np.linalg.norm(np.array(query_embedding) - np.array(embedding))
+        sim_score = 1 / (1 + dist)
+        similarities.append((sim_score, name, db_img))
+    similarities.sort(reverse=True)
+    top_matches = similarities[:]
+    gallery_items = []
+    text_summary = ""
+    for sim, name, img in top_matches:
+        caption = f"{name} - Similitud: {sim:.2f}"
+        gallery_items.append((img, caption))
+        text_summary += caption + "\n"
+    return gallery_items, text_summary
+# ⚙️ Inicializar base
+database = build_database()
 # 🎛️ Interfaz Gradio
 demo = gr.Interface(
     fn=find_similar_faces,
     inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
     outputs=[
+        gr.Gallery(label="📸 Rostros más similares"),
+        gr.Textbox(label="🧠 Similitud", lines=6)
     ],
+    title="🔍 Buscador de Rostros con DeepFace",
+    description="Sube una imagen y se comparará contra los rostros del dataset alojado en Hugging Face (`Segizu/dataset_faces`)."
 )
 demo.launch()

metadata.csv DELETED Viewed

The diff for this file is too large to render. See raw diff

metadata.py DELETED Viewed

@@ -1,23 +0,0 @@
-from huggingface_hub import HfApi
-import csv
-import os
-HF_TOKEN = os.getenv("HF_TOKEN") or ""
-repo_id = "Segizu/facial-recognition"
-api = HfApi()
-files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN)
-# Generar URLs completas
-base_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/"
-image_urls = [base_url + f for f in files if f.lower().endswith(".jpg")]
-# Escribir nuevo metadata.csv
-with open("metadata.csv", "w", newline="") as f:
-    writer = csv.writer(f)
-    writer.writerow(["image"])
-    for url in image_urls:
-        writer.writerow([url])
-print(f"✅ metadata.csv regenerado con URLs absolutas ({len(image_urls)} imágenes)")