evaltest2

Sleeping

App Files Files Community

wuhp commited on Apr 17

Commit

bd771a4

verified ·

1 Parent(s): 003a1e8

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -66

app.py CHANGED Viewed

@@ -72,7 +72,7 @@ _model_cache: dict[str, YOLO] = {}
 autoinc = 0  # helper for tmp‑dir names
 # ────────────────────────────────────────────────────────────────────────────
-#  Data‑class & helpers reused from the original evaluation script
 # ────────────────────────────────────────────────────────────────────────────
 @dataclass
 class QCConfig:
@@ -88,6 +88,13 @@ def load_yaml(path: Path) -> Dict:
     with path.open('r', encoding='utf-8') as f:
         return yaml.safe_load(f)
 def parse_label_file(path: Path) -> list[tuple[int, float, float, float, float]]:
     if not path or not path.exists() or path.stat().st_size == 0:
         return []
@@ -134,7 +141,7 @@ def get_model(weights: str) -> YOLO | None:
         _model_cache[weights] = YOLO(weights)
     return _model_cache[weights]
-# ───────── Functions for I/O-bound concurrency ─────────────────────────────
 def _quality_stat_args(args: Tuple[Path, float]) -> Tuple[Path, bool, bool, bool]:
     path, thr = args
     if cv2 is None:
@@ -155,7 +162,6 @@ def _is_corrupt(path: Path) -> bool:
     except Exception:
         return True
-# ───────────────── Quality Checks ──────────────────────────────────────────
 def qc_integrity(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
     missing = [i for i, l in zip(imgs, lbls) if l is None]
     corrupt = []
@@ -171,7 +177,7 @@ def qc_integrity(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
         "score": max(score, 0),
         "details": {
             "missing_label_files": [str(p) for p in missing],
-            "corrupt_images":       [str(p) for p in corrupt],
         }
     }
@@ -182,12 +188,12 @@ def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
         boxes.append(len(bs))
         counts.update(b[0] for b in bs)
     if not counts:
-        return {"name":"Class balance","score":0,"details":"No labels"}
     bal = min(counts.values()) / max(counts.values()) * 100
     return {
-        "name":"Class balance",
-        "score":bal,
-        "details":{
             "class_counts": dict(counts),
             "boxes_per_image": {
                 "min": min(boxes),
@@ -199,7 +205,7 @@ def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
 def qc_image_quality(imgs: List[Path], cfg: QCConfig) -> Dict:
     if cv2 is None:
-        return {"name":"Image quality","score":100,"details":"cv2 missing"}
     blurry, dark, bright = [], [], []
     sample = imgs[:cfg.sample_limit]
     with ThreadPoolExecutor(max_workers=cfg.cpu_count) as ex:
@@ -211,11 +217,11 @@ def qc_image_quality(imgs: List[Path], cfg: QCConfig) -> Dict:
     bad = len({*blurry, *dark, *bright})
     score = 100 - bad / max(len(sample), 1) * 100
     return {
-        "name":"Image quality",
-        "score":score,
-        "details":{
             "blurry": [str(p) for p in blurry],
-            "dark":   [str(p) for p in dark],
             "bright": [str(p) for p in bright]
         }
     }
@@ -230,33 +236,34 @@ def qc_duplicates(imgs: List[Path], cfg: QCConfig) -> Dict:
             fd.run()
             try:
                 cc = fd.connected_components_grouped(sort_by="comp_size", ascending=False)
-                clusters = cc["files"].tolist() if "files" in cc.columns else cc.groupby("component")["filename"].apply(list).tolist()
             except Exception:
                 clusters = fd.connected_components()
             dup = sum(len(c) - 1 for c in clusters)
             score = max(0.0, 100 - dup / len(imgs) * 100)
-            return {"name":"Duplicates","score":score,"details":{"groups":clusters[:50]}}
         except Exception as e:
-            return {"name":"Duplicates","score":100.0,"details":{"fastdup_error":str(e)}}
-    return {"name":"Duplicates","score":100.0,"details":{"note":"skipped"}}
 def _rel_iou(b1, b2):
     x1, y1, w1, h1 = b1
     x2, y2, w2, h2 = b2
-    xa1, ya1 = x1-w1/2, y1-h1/2
-    xa2, ya2 = x1+w1/2, y1+h1/2
-    xb1, yb1 = x2-w2/2, y2-h2/2
-    xb2, yb2 = x2+w2/2, y2+h2/2
     ix1 = max(xa1, xb1); iy1 = max(ya1, yb1)
     ix2 = min(xa2, xb2); iy2 = min(ya2, yb2)
-    inter = max(ix2-ix1,0)*max(iy2-iy1,0)
     union = w1*h1 + w2*h2 - inter
     return inter/union if union else 0.0
 def qc_model_qa(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
     model = get_model(cfg.weights)
     if model is None:
-        return {"name":"Model QA","score":100,"details":"skipped"}
     ious, mism = [], []
     sample = imgs[:cfg.sample_limit]
     for i in range(0, len(sample), cfg.batch_size):
@@ -266,21 +273,23 @@ def qc_model_qa(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
             gt = parse_label_file(Path(p).parent.parent/'labels'/f"{Path(p).stem}.txt")
             for cls, x, y, w, h in gt:
                 best = 0.0
-                for b, c, conf in zip(res.boxes.xywh.cpu().numpy(),
-                                      res.boxes.cls.cpu().numpy(),
-                                      res.boxes.conf.cpu().numpy()):
                     if conf < cfg.conf_thr or int(c) != cls:
                         continue
-                    best = max(best, _rel_iou((x,y,w,h), tuple(b)))
                 ious.append(best)
                 if best < cfg.iou_thr:
                     mism.append(str(p))
     miou = float(np.mean(ious)) if ious else 1.0
-    return {"name":"Model QA","score":miou*100,"details":{"mean_iou":miou,"mismatches":mism[:50]}}
 def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
     if get_noise_indices is None:
-        return {"name":"Label issues","score":100,"details":"skipped"}
     labels, idxs = [], []
     sample = imgs[:cfg.sample_limit]
     for i, p in enumerate(sample):
@@ -288,20 +297,20 @@ def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
         for cls, *_ in bs:
             labels.append(int(cls)); idxs.append(i)
     if not labels:
-        return {"name":"Label issues","score":100,"details":"no GT"}
     labels_arr = np.array(labels)
-    uniq       = sorted(set(labels_arr))
-    probs      = np.eye(len(uniq))[np.searchsorted(uniq, labels_arr)]
-    noise      = get_noise_indices(labels=labels_arr, probabilities=probs)
-    flags      = sorted({idxs[n] for n in noise})
-    files      = [str(sample[i]) for i in flags]
-    score      = 100 - len(flags)/len(labels)*100
-    return {"name":"Label issues","score":score,"details":{"files":files[:50]}}
 def aggregate(results: List[Dict]) -> float:
     return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
-# ───────────────── Roboflow TXT‑loading logic (from v3) ────────────────────
 RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
 def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
@@ -347,6 +356,102 @@ def run_quality(
     df.index.name = "class"
     return "\n".join(md), df
 # ════════════════════════════════════════════════════════════════════════════
 #                                 UI LAYER
 # ════════════════════════════════════════════════════════════════════════════
@@ -357,20 +462,20 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
     """)
     with gr.Tab("Evaluate"):
-        with gr.Row():
-            api_in     = gr.Textbox(label="Roboflow API key", type="password")
-            url_txt    = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
-        with gr.Row():
-            zip_in     = gr.File(label="Dataset ZIP")
-            path_in    = gr.Textbox(label="Server path")
-        with gr.Row():
-            yaml_in    = gr.File(label="Custom YAML", file_types=['.yaml'])
-            weights_in = gr.File(label="YOLO weights (.pt)")
         blur_sl    = gr.Slider(0.0, 500.0, value=100.0, label="Blur threshold")
         iou_sl     = gr.Slider(0.0, 1.0,   value=0.5,   label="IOU threshold")
         conf_sl    = gr.Slider(0.0, 1.0,   value=0.25,  label="Min detection confidence")
         run_dup    = gr.Checkbox(label="Check duplicates (fastdup)", value=False)
         run_modelqa= gr.Checkbox(label="Run Model QA & cleanlab", value=False)
         run_eval   = gr.Button("Run Evaluation")
         out_md     = gr.Markdown()
         out_df     = gr.Dataframe()
@@ -383,11 +488,9 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
             cfg = QCConfig(blur_thr, iou_thr, conf_thr, weights.name if weights else None)
             rf = Roboflow(api_key) if api_key and Roboflow else None
-            # Roboflow URLs via .txt
-            if url_txt:
                 for line in Path(url_txt.name).read_text().splitlines():
-                    if not line.strip():
-                        continue
                     try:
                         ds = download_rf_dataset(line, rf, TMP_ROOT)
                         md, df = run_quality(
@@ -399,7 +502,6 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
                     except Exception as e:
                         reports.append(f"### {line}\n⚠️ {e}")
-            # ZIP upload
             if zip_file:
                 tmp = Path(tempfile.mkdtemp())
                 shutil.unpack_archive(zip_file.name, tmp)
@@ -412,7 +514,6 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
                 reports.append(md); dfs.append(df)
                 shutil.rmtree(tmp, ignore_errors=True)
-            # Server path
             if server_path:
                 ds = Path(server_path)
                 md, df = run_quality(
@@ -446,14 +547,16 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
         def _load_cb(rf_key, rf_urls_file, zip_files):
             global autoinc
             info_list, log_lines = [], []
-            if rf_urls_file:
                 for url in Path(rf_urls_file.name).read_text().splitlines():
-                    if not url.strip():
-                        continue
                     try:
-                        ds = download_rf_dataset(url, Roboflow(rf_key), TMP_ROOT)
-                        names, splits = load_yaml(ds/"data.yaml").get("names", []), [s for s in ("train","valid","test") if (ds/s).exists()]
                         info_list.append((str(ds), names, splits, Path(ds).name))
                         log_lines.append(f"✔️ RF dataset **{Path(ds).name}** loaded ({len(names)} classes)")
                     except Exception as e:
@@ -465,14 +568,13 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
                 tmp.mkdir(parents=True, exist_ok=True)
                 shutil.unpack_archive(f.name, tmp)
                 yaml_p = next(tmp.rglob("*.yaml"), None)
-                if not yaml_p:
-                    continue
-                names = load_yaml(yaml_p).get("names", [])
-                splits= [s for s in ("train","valid","test") if (tmp/s).exists()]
-                info_list.append((str(tmp), names, splits, tmp.name))
-                log_lines.append(f"✔️ ZIP **{tmp.name}** loaded")
-            return info_list, "\n".join(log_lines) if log_lines else "No datasets loaded."
         load_btn.click(_load_cb, [rf_key, rf_urls, zips_in], [ds_state, load_log])
@@ -504,7 +606,8 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
                 return None, "⚠️ Load datasets first."
             out_dir = merge_datasets(ds_info, class_df)
             zip_path = shutil.make_archive(str(out_dir), "zip", out_dir)
-            return zip_path, f"✅ Merged dataset at **{out_dir}** with {len(list(Path(out_dir).rglob('*.jpg')))} images."
         merge_btn.click(_merge_cb, [ds_state, class_df], [zip_out, merge_log])

 autoinc = 0  # helper for tmp‑dir names
 # ────────────────────────────────────────────────────────────────────────────
+#  Data‑class & basic helpers
 # ────────────────────────────────────────────────────────────────────────────
 @dataclass
 class QCConfig:
     with path.open('r', encoding='utf-8') as f:
         return yaml.safe_load(f)
+def load_class_names(yaml_path: Path) -> List[str]:
+    data = load_yaml(yaml_path)
+    names = data.get("names", [])
+    if isinstance(names, dict):
+        return [names[k] for k in sorted(names, key=lambda x: int(x))]
+    return list(names)
 def parse_label_file(path: Path) -> list[tuple[int, float, float, float, float]]:
     if not path or not path.exists() or path.stat().st_size == 0:
         return []
         _model_cache[weights] = YOLO(weights)
     return _model_cache[weights]
+# ───────── Concurrency helpers & QC functions ───────────────────────────────
 def _quality_stat_args(args: Tuple[Path, float]) -> Tuple[Path, bool, bool, bool]:
     path, thr = args
     if cv2 is None:
     except Exception:
         return True
 def qc_integrity(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
     missing = [i for i, l in zip(imgs, lbls) if l is None]
     corrupt = []
         "score": max(score, 0),
         "details": {
             "missing_label_files": [str(p) for p in missing],
+            "corrupt_images": [str(p) for p in corrupt],
         }
     }
         boxes.append(len(bs))
         counts.update(b[0] for b in bs)
     if not counts:
+        return {"name": "Class balance", "score": 0, "details": "No labels"}
     bal = min(counts.values()) / max(counts.values()) * 100
     return {
+        "name": "Class balance",
+        "score": bal,
+        "details": {
             "class_counts": dict(counts),
             "boxes_per_image": {
                 "min": min(boxes),
 def qc_image_quality(imgs: List[Path], cfg: QCConfig) -> Dict:
     if cv2 is None:
+        return {"name": "Image quality", "score": 100, "details": "cv2 missing"}
     blurry, dark, bright = [], [], []
     sample = imgs[:cfg.sample_limit]
     with ThreadPoolExecutor(max_workers=cfg.cpu_count) as ex:
     bad = len({*blurry, *dark, *bright})
     score = 100 - bad / max(len(sample), 1) * 100
     return {
+        "name": "Image quality",
+        "score": score,
+        "details": {
             "blurry": [str(p) for p in blurry],
+            "dark": [str(p) for p in dark],
             "bright": [str(p) for p in bright]
         }
     }
             fd.run()
             try:
                 cc = fd.connected_components_grouped(sort_by="comp_size", ascending=False)
+                clusters = (cc["files"].tolist() if "files" in cc.columns
+                            else cc.groupby("component")["filename"].apply(list).tolist())
             except Exception:
                 clusters = fd.connected_components()
             dup = sum(len(c) - 1 for c in clusters)
             score = max(0.0, 100 - dup / len(imgs) * 100)
+            return {"name": "Duplicates", "score": score, "details": {"groups": clusters[:50]}}
         except Exception as e:
+            return {"name": "Duplicates", "score": 100.0, "details": {"fastdup_error": str(e)}}
+    return {"name": "Duplicates", "score": 100.0, "details": {"note": "skipped"}}
 def _rel_iou(b1, b2):
     x1, y1, w1, h1 = b1
     x2, y2, w2, h2 = b2
+    xa1, ya1 = x1 - w1/2, y1 - h1/2
+    xa2, ya2 = x1 + w1/2, y1 + h1/2
+    xb1, yb1 = x2 - w2/2, y2 - h2/2
+    xb2, yb2 = x2 + w2/2, y2 + h2/2
     ix1 = max(xa1, xb1); iy1 = max(ya1, yb1)
     ix2 = min(xa2, xb2); iy2 = min(ya2, yb2)
+    inter = max(ix2 - ix1, 0) * max(iy2 - iy1, 0)
     union = w1*h1 + w2*h2 - inter
     return inter/union if union else 0.0
 def qc_model_qa(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
     model = get_model(cfg.weights)
     if model is None:
+        return {"name": "Model QA", "score": 100, "details": "skipped"}
     ious, mism = [], []
     sample = imgs[:cfg.sample_limit]
     for i in range(0, len(sample), cfg.batch_size):
             gt = parse_label_file(Path(p).parent.parent/'labels'/f"{Path(p).stem}.txt")
             for cls, x, y, w, h in gt:
                 best = 0.0
+                for b, c, conf in zip(
+                    res.boxes.xywh.cpu().numpy(),
+                    res.boxes.cls.cpu().numpy(),
+                    res.boxes.conf.cpu().numpy()
+                ):
                     if conf < cfg.conf_thr or int(c) != cls:
                         continue
+                    best = max(best, _rel_iou((x, y, w, h), tuple(b)))
                 ious.append(best)
                 if best < cfg.iou_thr:
                     mism.append(str(p))
     miou = float(np.mean(ious)) if ious else 1.0
+    return {"name": "Model QA", "score": miou*100, "details": {"mean_iou": miou, "mismatches": mism[:50]}}
 def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
     if get_noise_indices is None:
+        return {"name": "Label issues", "score": 100, "details": "skipped"}
     labels, idxs = [], []
     sample = imgs[:cfg.sample_limit]
     for i, p in enumerate(sample):
         for cls, *_ in bs:
             labels.append(int(cls)); idxs.append(i)
     if not labels:
+        return {"name": "Label issues", "score": 100, "details": "no GT"}
     labels_arr = np.array(labels)
+    uniq = sorted(set(labels_arr))
+    probs = np.eye(len(uniq))[np.searchsorted(uniq, labels_arr)]
+    noise = get_noise_indices(labels=labels_arr, probabilities=probs)
+    flags = sorted({idxs[n] for n in noise})
+    files = [str(sample[i]) for i in flags]
+    score = 100 - len(flags)/len(labels)*100
+    return {"name": "Label issues", "score": score, "details": {"files": files[:50]}}
 def aggregate(results: List[Dict]) -> float:
     return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
+# ───────────────── Roboflow TXT‑loading logic for both tabs ─────────────────
 RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
 def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
     df.index.name = "class"
     return "\n".join(md), df
+def merge_datasets(
+    dataset_info_list: List[Tuple[str, List[str], List[str], str]],
+    class_map_df: pd.DataFrame,
+    out_dir: Path = Path("merged_dataset"),
+    seed: int = 1234,
+) -> Path:
+    random.seed(seed)
+    if out_dir.exists():
+        shutil.rmtree(out_dir, onerror=lambda f, p, _: (os.chmod(p, stat.S_IWRITE), f(p)))
+    for sub in ("train/images","train/labels","valid/images","valid/labels"):
+        (out_dir / sub).mkdir(parents=True, exist_ok=True)
+    class_name_mapping = {
+        row["original_class"]: row["new_name"] if not row["remove"] else "__REMOVED__"
+        for _, row in class_map_df.iterrows()
+    }
+    limits_per_merged = {
+        row["new_name"]: int(row["max_images"])
+        for _, row in class_map_df.iterrows()
+        if not row["remove"]
+    }
+    active_classes = [c for c in sorted(set(class_name_mapping.values())) if c != "__REMOVED__"]
+    id_map = {cls: idx for idx, cls in enumerate(active_classes)}
+    image_to_classes: dict[str, set[str]] = {}
+    image_to_label: dict[str, Path] = {}
+    class_to_images: dict[str, set[str]] = {c: set() for c in active_classes}
+    for dloc, class_names_dataset, splits, _ in dataset_info_list:
+        for split in splits:
+            labels_root = Path(dloc) / split / "labels"
+            if not labels_root.exists():
+                continue
+            for lp in labels_root.rglob("*.txt"):
+                im_name, cls_set = lp.stem + ".jpg", set()
+                for cls_id, *rest in parse_label_file(lp):
+                    orig = class_names_dataset[int(cls_id)] if int(cls_id) < len(class_names_dataset) else None
+                    if orig:
+                        new = class_name_mapping.get(orig, orig)
+                        if new in active_classes:
+                            cls_set.add(new)
+                if not cls_set:
+                    continue
+                img_path = str(lp.parent.parent / "images" / f"{lp.stem}.jpg")
+                image_to_classes[img_path] = cls_set
+                image_to_label[img_path] = lp
+                for c in cls_set:
+                    class_to_images[c].add(img_path)
+    selected_images = set()
+    counters = {c: 0 for c in active_classes}
+    pool = [img for imgs in class_to_images.values() for img in imgs]
+    random.shuffle(pool)
+    for img in pool:
+        cs = image_to_classes[img]
+        if any(counters[c] >= limits_per_merged.get(c, 0) for c in cs):
+            continue
+        selected_images.add(img)
+        for c in cs:
+            counters[c] += 1
+    for img in selected_images:
+        split = "train" if random.random() < 0.9 else "valid"
+        dst_img = out_dir / split / "images" / Path(img).name
+        dst_img.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copy(img, dst_img)
+        lp_src = image_to_label[img]
+        dst_lbl = out_dir / split / "labels" / lp_src.name
+        dst_lbl.parent.mkdir(parents=True, exist_ok=True)
+        lines = lp_src.read_text().splitlines()
+        new_lines = []
+        for line in lines:
+            parts = line.split()
+            cid = int(parts[0])
+            orig = class_names_dataset[cid] if cid < len(class_names_dataset) else None
+            merged = class_name_mapping.get(orig, orig) if orig else None
+            if merged and merged in active_classes:
+                new_id = id_map[merged]
+                new_lines.append(" ".join([str(new_id)] + parts[1:]))
+        if new_lines:
+            dst_lbl.write_text("\n".join(new_lines))
+        else:
+            dst_img.unlink(missing_ok=True)
+    data_yaml = {
+        "path": str(out_dir.resolve()),
+        "train": "train/images",
+        "val": "valid/images",
+        "nc": len(active_classes),
+        "names": active_classes,
+    }
+    (out_dir / "data.yaml").write_text(yaml.safe_dump(data_yaml))
+    return out_dir
 # ════════════════════════════════════════════════════════════════════════════
 #                                 UI LAYER
 # ════════════════════════════════════════════════════════════════════════════
     """)
     with gr.Tab("Evaluate"):
+        api_in     = gr.Textbox(label="Roboflow API key", type="password")
+        url_txt    = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
+        zip_in     = gr.File(label="Dataset ZIP")
+        path_in    = gr.Textbox(label="Server path")
+        yaml_in    = gr.File(label="Custom YAML", file_types=['.yaml'])
+        weights_in = gr.File(label="YOLO weights (.pt)")
         blur_sl    = gr.Slider(0.0, 500.0, value=100.0, label="Blur threshold")
         iou_sl     = gr.Slider(0.0, 1.0,   value=0.5,   label="IOU threshold")
         conf_sl    = gr.Slider(0.0, 1.0,   value=0.25,  label="Min detection confidence")
         run_dup    = gr.Checkbox(label="Check duplicates (fastdup)", value=False)
         run_modelqa= gr.Checkbox(label="Run Model QA & cleanlab", value=False)
         run_eval   = gr.Button("Run Evaluation")
         out_md     = gr.Markdown()
         out_df     = gr.Dataframe()
             cfg = QCConfig(blur_thr, iou_thr, conf_thr, weights.name if weights else None)
             rf = Roboflow(api_key) if api_key and Roboflow else None
+            if url_txt and rf:
                 for line in Path(url_txt.name).read_text().splitlines():
+                    if not line.strip(): continue
                     try:
                         ds = download_rf_dataset(line, rf, TMP_ROOT)
                         md, df = run_quality(
                     except Exception as e:
                         reports.append(f"### {line}\n⚠️ {e}")
             if zip_file:
                 tmp = Path(tempfile.mkdtemp())
                 shutil.unpack_archive(zip_file.name, tmp)
                 reports.append(md); dfs.append(df)
                 shutil.rmtree(tmp, ignore_errors=True)
             if server_path:
                 ds = Path(server_path)
                 md, df = run_quality(
         def _load_cb(rf_key, rf_urls_file, zip_files):
             global autoinc
             info_list, log_lines = [], []
+            rf = Roboflow(rf_key) if rf_key and Roboflow else None
+            if rf_urls_file and rf:
                 for url in Path(rf_urls_file.name).read_text().splitlines():
+                    url = url.strip()
+                    if not url: continue
                     try:
+                        ds = download_rf_dataset(url, rf, TMP_ROOT)
+                        names = load_class_names(ds/"data.yaml")
+                        splits = [s for s in ("train","valid","test") if (ds/s).exists()]
                         info_list.append((str(ds), names, splits, Path(ds).name))
                         log_lines.append(f"✔️ RF dataset **{Path(ds).name}** loaded ({len(names)} classes)")
                     except Exception as e:
                 tmp.mkdir(parents=True, exist_ok=True)
                 shutil.unpack_archive(f.name, tmp)
                 yaml_p = next(tmp.rglob("*.yaml"), None)
+                if yaml_p:
+                    names = load_class_names(yaml_p)
+                    splits= [s for s in ("train","valid","test") if (tmp/s).exists()]
+                    info_list.append((str(tmp), names, splits, tmp.name))
+                    log_lines.append(f"✔️ ZIP **{tmp.name}** loaded")
+            return info_list, "\n".join(log_lines) or "No datasets loaded."
         load_btn.click(_load_cb, [rf_key, rf_urls, zips_in], [ds_state, load_log])
                 return None, "⚠️ Load datasets first."
             out_dir = merge_datasets(ds_info, class_df)
             zip_path = shutil.make_archive(str(out_dir), "zip", out_dir)
+            count = len(list(Path(out_dir).rglob("*.jpg")))
+            return zip_path, f"✅ Merged dataset at **{out_dir}** with {count} images."
         merge_btn.click(_merge_cb, [ds_state, class_df], [zip_out, merge_log])