Spaces:
Build error
Build error
metadata v12
Browse files
app.py
CHANGED
@@ -22,12 +22,6 @@ EMBEDDINGS_FILE = EMBEDDINGS_DIR / "embeddings.pkl"
|
|
22 |
|
23 |
# β
Cargar dataset desde metadata.csv (con URLs absolutas)
|
24 |
dataset = load_dataset("csv", data_files="metadata.csv")
|
25 |
-
dataset = dataset["train"].cast_column("image", HfImage(decode=True))
|
26 |
-
|
27 |
-
# Debug dataset loading
|
28 |
-
print("Dataset columns:", dataset.column_names)
|
29 |
-
print("Dataset features:", dataset.features)
|
30 |
-
print("First item:", dataset[0])
|
31 |
|
32 |
# π Preprocesar imagen para Facenet
|
33 |
def preprocess_image(img: Image.Image) -> np.ndarray:
|
@@ -48,36 +42,30 @@ def build_database():
|
|
48 |
|
49 |
# Debug: Print dataset structure
|
50 |
print("Dataset structure:", dataset.features)
|
51 |
-
print("First item structure:", dataset[0])
|
52 |
print("Dataset type:", type(dataset))
|
53 |
-
print("Dataset item type:", type(dataset[0]))
|
54 |
|
55 |
-
for i in range(0, len(dataset), batch_size):
|
56 |
-
batch = dataset[i:i + batch_size]
|
57 |
-
print(f"π¦ Procesando lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
|
58 |
|
59 |
for j, item in enumerate(batch):
|
60 |
try:
|
61 |
print(f"Debug - Processing item {i+j}")
|
62 |
print(f"Debug - Item type: {type(item)}")
|
63 |
-
print(f"Debug - Item
|
64 |
-
|
65 |
-
# Skip if item is not in the expected format
|
66 |
-
if not isinstance(item, dict):
|
67 |
-
print(f"β οΈ Skipping item {i+j} - Not a dictionary")
|
68 |
-
continue
|
69 |
|
70 |
-
|
71 |
-
|
|
|
|
|
72 |
continue
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
if not isinstance(img, Image.Image):
|
79 |
-
print(f"β οΈ Skipping item {i+j} - Not a PIL Image")
|
80 |
-
continue
|
81 |
|
82 |
# Ensure image is in RGB mode
|
83 |
img = img.convert("RGB")
|
@@ -89,7 +77,7 @@ def build_database():
|
|
89 |
)[0]["embedding"]
|
90 |
|
91 |
database.append((f"image_{i+j}", img, embedding))
|
92 |
-
print(f"β
Procesada imagen {i+j+1}/{len(dataset)}")
|
93 |
|
94 |
del img_processed
|
95 |
gc.collect()
|
|
|
22 |
|
23 |
# β
Cargar dataset desde metadata.csv (con URLs absolutas)
|
24 |
dataset = load_dataset("csv", data_files="metadata.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
# π Preprocesar imagen para Facenet
|
27 |
def preprocess_image(img: Image.Image) -> np.ndarray:
|
|
|
42 |
|
43 |
# Debug: Print dataset structure
|
44 |
print("Dataset structure:", dataset.features)
|
45 |
+
print("First item structure:", dataset["train"][0])
|
46 |
print("Dataset type:", type(dataset))
|
47 |
+
print("Dataset item type:", type(dataset["train"][0]))
|
48 |
|
49 |
+
for i in range(0, len(dataset["train"]), batch_size):
|
50 |
+
batch = dataset["train"][i:i + batch_size]
|
51 |
+
print(f"π¦ Procesando lote {i // batch_size + 1}/{(len(dataset['train']) + batch_size - 1) // batch_size}")
|
52 |
|
53 |
for j, item in enumerate(batch):
|
54 |
try:
|
55 |
print(f"Debug - Processing item {i+j}")
|
56 |
print(f"Debug - Item type: {type(item)}")
|
57 |
+
print(f"Debug - Item content: {item}")
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
# Get the image URL
|
60 |
+
image_url = item["image"]
|
61 |
+
if not isinstance(image_url, str) or not image_url.startswith("http"):
|
62 |
+
print(f"β οΈ Skipping item {i+j} - Invalid URL format")
|
63 |
continue
|
64 |
|
65 |
+
# Download and process the image
|
66 |
+
response = requests.get(image_url, timeout=10)
|
67 |
+
response.raise_for_status()
|
68 |
+
img = Image.open(BytesIO(response.content))
|
|
|
|
|
|
|
69 |
|
70 |
# Ensure image is in RGB mode
|
71 |
img = img.convert("RGB")
|
|
|
77 |
)[0]["embedding"]
|
78 |
|
79 |
database.append((f"image_{i+j}", img, embedding))
|
80 |
+
print(f"β
Procesada imagen {i+j+1}/{len(dataset['train'])}")
|
81 |
|
82 |
del img_processed
|
83 |
gc.collect()
|