evaltest2

Sleeping

App Files Files Community

wuhp commited on Apr 17

Commit

e09a48c

verified ·

1 Parent(s): aa01fdb

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -118

app.py CHANGED Viewed

@@ -1,18 +1,16 @@
-# app.py – Roboflow‑aware YOLOv8 Dataset Quality Evaluator for Hugging Face Spaces
-#
-# ▸ Prompts for a Roboflow **API key** and a `.txt` list of Universe dataset URLs (one per line)
-# ▸ Downloads each dataset automatically in YOLOv8 format to a temp directory
-# ▸ Runs a battery of quality checks:
-#     – integrity / corruption
-#     – class‑balance stats
-#     – blur / brightness image‑quality flags
-#     – exact / near‑duplicate detection
-#     – optional model‑assisted label QA (needs a YOLO .pt weights file)
-# ▸ Still supports manual ZIP / server‑path evaluation
-# ▸ Outputs a Markdown report + class‑distribution dataframe
-#
-# Hugging Face Spaces picks up `app.py` automatically.  Dependencies go in `requirements.txt`.
-# Spaces injects the port as $PORT – we pass it to demo.launch().
 from __future__ import annotations
@@ -23,7 +21,7 @@ import re
 import shutil
 import tempfile
 from collections import Counter, defaultdict
-from concurrent.futures import ThreadPoolExecutor, as_completed
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, List, Tuple
@@ -35,9 +33,7 @@ import yaml
 from PIL import Image
 from tqdm import tqdm
-# --------------------------------------------------------------------------- #
-# Optional heavy deps – present locally, but fine‑grained to keep Spaces slim #
-# --------------------------------------------------------------------------- #
 try:
     import cv2  # type: ignore
 except ImportError:
@@ -48,6 +44,11 @@ try:
 except ImportError:
     imagehash = None
 try:
     from ultralytics import YOLO  # type: ignore
 except ImportError:
@@ -58,36 +59,44 @@ try:
 except ImportError:
     Roboflow = None  # type: ignore
-# --------------------------------------------------------------------------- #
 TMP_ROOT = Path(tempfile.gettempdir()) / "rf_datasets"
 TMP_ROOT.mkdir(parents=True, exist_ok=True)
 @dataclass
 class DuplicateGroup:
     hash_val: str
     paths: List[Path]
-# --------------------------------------------------------------------------- #
-# Generic helpers                                                             #
-# --------------------------------------------------------------------------- #
 def load_yaml(path: Path) -> Dict:
     with path.open(encoding="utf-8") as f:
         return yaml.safe_load(f)
-def parse_label_file(path: Path) -> List[Tuple[int, float, float, float, float]]:
-    out: List[Tuple[int, float, float, float, float]] = []
-    if not path.exists():
-        return out
-    with path.open(encoding="utf-8") as f:
-        for ln in f:
-            parts = ln.strip().split()
-            if len(parts) == 5:
-                cid, *coords = parts
-                out.append((int(cid), *map(float, coords)))
-    return out
 def guess_image_dirs(root: Path) -> List[Path]:
@@ -114,13 +123,13 @@ def gather_dataset(root: Path, yaml_path: Path | None = None):
         raise FileNotFoundError("images/ directory hierarchy missing")
     imgs = [p for d in img_dirs for p in d.rglob("*.*") if imghdr.what(p) is not None]
-    lbls = [p.parent.parent / "labels" / f"{p.stem}.txt" for p in imgs]
     return imgs, lbls, meta
-# --------------------------------------------------------------------------- #
-# Quality‑check stages                                                        #
-# --------------------------------------------------------------------------- #
 def _is_corrupt(path: Path) -> bool:
     try:
         with Image.open(path) as im:
@@ -130,40 +139,38 @@ def _is_corrupt(path: Path) -> bool:
         return True
-def qc_integrity(imgs: List[Path], lbls: List[Path]) -> Dict:
-    miss_lbl = [i for i, l in zip(imgs, lbls) if not l.exists()]
-    miss_img = [l for l in lbls if l.exists() and not (l.parent.parent / "images" / f"{l.stem}{l.suffix}").exists()]
     corrupt: List[Path] = []
-    with ThreadPoolExecutor(max_workers=os.cpu_count() or 4) as ex:
         fut = {ex.submit(_is_corrupt, p): p for p in imgs}
         for f in tqdm(as_completed(fut), total=len(fut), desc="integrity", leave=False):
             if f.result():
                 corrupt.append(fut[f])
-    score = 100 - (len(miss_lbl) + len(miss_img) + len(corrupt)) / max(len(imgs), 1) * 100
     return {
         "name": "Integrity",
         "score": max(score, 0),
         "details": {
             "missing_label_files": [str(p) for p in miss_lbl],
-            "missing_image_files": [str(p) for p in miss_img],
             "corrupt_images": [str(p) for p in corrupt],
         },
     }
-def qc_class_balance(lbls: List[Path]) -> Dict:
     cls_counts = Counter()
     boxes_per_img = []
     for l in lbls:
-        bs = parse_label_file(l)
         boxes_per_img.append(len(bs))
         cls_counts.update(b[0] for b in bs)
     if not cls_counts:
         return {"name": "Class balance", "score": 0, "details": "No labels"}
-    bal = min(cls_counts.values()) / max(cls_counts.values()) * 100
     return {
         "name": "Class balance",
         "score": bal,
@@ -177,24 +184,39 @@ def qc_class_balance(lbls: List[Path]) -> Dict:
         },
     }
-def qc_image_quality(imgs: List[Path], blur_thr: float = 100.0) -> Dict:
     if cv2 is None:
         return {"name": "Image quality", "score": 100, "details": "cv2 not installed"}
-    blurry, dark, bright = [], [], []
-    for p in tqdm(imgs, desc="img‑quality", leave=False):
-        im = cv2.imread(str(p))
-        if im is None:
-            continue
-        gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
-        lap = cv2.Laplacian(gray, cv2.CV_64F).var()
-        br = np.mean(gray)
-        if lap < blur_thr:
-            blurry.append(p)
-        if br < 25:
-            dark.append(p)
-        if br > 230:
-            bright.append(p)
     bad = len(set(blurry + dark + bright))
     score = 100 - bad / max(len(imgs), 1) * 100
@@ -208,15 +230,40 @@ def qc_image_quality(imgs: List[Path], blur_thr: float = 100.0) -> Dict:
         },
     }
-def qc_duplicates(imgs: List[Path]) -> Dict:
     if imagehash is None:
-        return {"name": "Duplicates", "score": 100, "details": "imagehash not installed"}
     hashes: Dict[str, List[Path]] = defaultdict(list)
-    for p in tqdm(imgs, desc="hashing", leave=False):
-        h = str(imagehash.average_hash(Image.open(p)))
-        hashes[h].append(p)
     groups = [g for g in hashes.values() if len(g) > 1]
     dup = sum(len(g) - 1 for g in groups)
@@ -224,9 +271,10 @@ def qc_duplicates(imgs: List[Path]) -> Dict:
     return {
         "name": "Duplicates",
         "score": score,
-        "details": {"groups": [[str(p) for p in g] for g in groups]},
     }
 def _rel_iou(b1, b2):
     x1, y1, w1, h1 = b1
@@ -234,58 +282,48 @@ def _rel_iou(b1, b2):
     xa1, ya1, xa2, ya2 = x1 - w1 / 2, y1 - h1 / 2, x1 + w1 / 2, y1 + h1 / 2
     xb1, yb1, xb2, yb2 = x2 - w2 / 2, y2 - h2 / 2, x2 + w2 / 2, y2 + h2 / 2
     ix1, iy1, ix2, iy2 = max(xa1, xb1), max(ya1, yb1), min(xa2, xb2), min(ya2, yb2)
-    iw, ih = max(ix2 - ix1, 0), max(iy2 - iy1, 0)
-    inter = iw * ih
     union = w1 * h1 + w2 * h2 - inter
-    return inter / union if union else 0
-def qc_model_qa(imgs: List[Path], weights: str | None, lbls: List[Path], iou_thr: float = 0.5) -> Dict:
     if weights is None or YOLO is None:
-        return {"name": "Model QA", "score": 100, "details": "weights or YOLO unavailable"}
     model = YOLO(weights)
     ious, mism = [], []
-    for p in tqdm(imgs, desc="model‑QA", leave=False):
-        gtb = parse_label_file(p.parent.parent / "labels" / f"{p.stem}.txt")
-        if not gtb:
-            continue
-        res = model.predict(p, verbose=False)[0]
-        for cls, x, y, w, h in gtb:
-            best = 0.0
-            for b, c in zip(res.boxes.xywh, res.boxes.cls):
-                if int(c) != cls:
-                    continue
-                best = max(best, _rel_iou((x, y, w, h), tuple(b.tolist())))
-            ious.append(best)
-            if best < iou_thr:
-                mism.append(p)
     miou = float(np.mean(ious)) if ious else 1.0
     return {
         "name": "Model QA",
         "score": miou * 100,
-        "details": {"mean_iou": miou, "mismatched_images": [str(p) for p in mism[:50]]},
     }
-# --------------------------------------------------------------------------- #
-DEFAULT_W = {
-    "Integrity": 0.30,
-    "Class balance": 0.15,
-    "Image quality": 0.15,
-    "Duplicates": 0.10,
-    "Model QA": 0.30,
-}
 def aggregate(scores):
     return sum(DEFAULT_W.get(r["name"], 0) * r["score"] for r in scores)
-# --------------------------------------------------------------------------- #
-# Roboflow helpers                                                             #
-# --------------------------------------------------------------------------- #
 RF_RE = re.compile(r"https://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
 def download_rf_dataset(url: str, rf_api: "Roboflow", dest: Path) -> Path:
@@ -302,10 +340,8 @@ def download_rf_dataset(url: str, rf_api: "Roboflow", dest: Path) -> Path:
     project.version(int(ver)).download("yolov8", location=str(ds_dir))
     return ds_dir
-# --------------------------------------------------------------------------- #
-# Main evaluation logic                                                        #
-# --------------------------------------------------------------------------- #
 def run_quality(root: Path, yaml_override: Path | None, weights: Path | None):
     imgs, lbls, meta = gather_dataset(root, yaml_override)
     res = [
@@ -316,8 +352,8 @@ def run_quality(root: Path, yaml_override: Path | None, weights: Path | None):
         qc_model_qa(imgs, str(weights) if weights else None, lbls),
     ]
     final = aggregate(res)
-    # markdown
-    md = [f"## **{meta.get('name', root.name)}**  —  Score {final:.1f}/100"]
     for r in res:
         md.append(f"### {r['name']}  {r['score']:.1f}")
         md.append("<details><summary>details</summary>\n\n```json")
@@ -330,10 +366,8 @@ def run_quality(root: Path, yaml_override: Path | None, weights: Path | None):
     df.index.name = "class"
     return md_str, df
-# --------------------------------------------------------------------------- #
-# Gradio interface                                                             #
-# --------------------------------------------------------------------------- #
 def evaluate(
     api_key: str,
     url_txt: gr.File | None,
@@ -347,7 +381,7 @@ def evaluate(
     reports, dfs = [], []
-    # ---- Roboflow batch mode ----
     if url_txt:
         if Roboflow is None:
             return "`roboflow` not installed", pd.DataFrame()
@@ -355,8 +389,7 @@ def evaluate(
             return "Enter Roboflow API key", pd.DataFrame()
         rf = Roboflow(api_key=api_key.strip())
-        txt_lines = Path(url_txt.name).read_text().splitlines()
-        for line in txt_lines:
             if not line.strip():
                 continue
             try:
@@ -367,7 +400,7 @@ def evaluate(
             except Exception as e:
                 reports.append(f"### {line}\n\n⚠️ {e}")
-    # ---- Manual ZIP ----
     if zip_file:
         tmp_dir = Path(tempfile.mkdtemp())
         shutil.unpack_archive(zip_file.name, tmp_dir)
@@ -376,7 +409,7 @@ def evaluate(
         dfs.append(df)
         shutil.rmtree(tmp_dir, ignore_errors=True)
-    # ---- Manual path ----
     if server_path:
         md, df = run_quality(Path(server_path), Path(yaml_file.name) if yaml_file else None, Path(weights.name) if weights else None)
         reports.append(md)
@@ -386,7 +419,7 @@ def evaluate(
     combined_df = pd.concat(dfs).groupby(level=0).sum() if dfs else pd.DataFrame()
     return summary_md, combined_df
 with gr.Blocks(title="YOLO Dataset Quality Evaluator") as demo:
     gr.Markdown(
         """
@@ -396,7 +429,7 @@ with gr.Blocks(title="YOLO Dataset Quality Evaluator") as demo:
 1. Paste your **Roboflow API key**
 2. Upload a **.txt** file – one `https://universe.roboflow.com/.../dataset/x` per line
-### Manual
 * Upload a dataset **ZIP** or type a dataset **path** on the server
 * Optionally supply a custom **data.yaml** and/or a **YOLO .pt** weights file for model‑assisted QA
 """

+"""
+app.py – Roboflow‑aware YOLOv8 Dataset Quality Evaluator (v2)
+Changelog (2025‑04‑17)
+──────────────────────
+• **CPU‑bound loops parallelised** with `concurrent.futures.ProcessPoolExecutor`.
+• **Batch inference** in `qc_model_qa()` (GPU util ↑, latency ↓).
+• Optional **fastdup** path for duplicate detection (≈ 10× faster on large sets).
+• Faster NumPy‑based `parse_label_file()`.
+• Small refactors → clearer separation of stages & fewer globals.
+• Graceful degradation if heavy deps unavailable (cv2, imagehash, fastdup).
+• Tunable `CPU_COUNT` + env‑var guard for HF Spaces quota.
+"""
 from __future__ import annotations
 import shutil
 import tempfile
 from collections import Counter, defaultdict
+from concurrent.futures import ProcessPoolExecutor, as_completed
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, List, Tuple
 from PIL import Image
 from tqdm import tqdm
+# ───────────────────────────────────────── Heavy optional deps ──
 try:
     import cv2  # type: ignore
 except ImportError:
 except ImportError:
     imagehash = None
+try:
+    import fastdup  # type: ignore
+except ImportError:
+    fastdup = None
 try:
     from ultralytics import YOLO  # type: ignore
 except ImportError:
 except ImportError:
     Roboflow = None  # type: ignore
+# ───────────────────────────────────────── Config & constants ──
 TMP_ROOT = Path(tempfile.gettempdir()) / "rf_datasets"
 TMP_ROOT.mkdir(parents=True, exist_ok=True)
+# Limit CPU workers on HF Spaces (feel free to raise locally)
+CPU_COUNT = int(os.getenv("QC_CPU", max(1, (os.cpu_count() or 4) // 2)))
+BATCH = int(os.getenv("QC_BATCH", 16))
+DEFAULT_W = {
+    "Integrity": 0.30,
+    "Class balance": 0.15,
+    "Image quality": 0.15,
+    "Duplicates": 0.10,
+    "Model QA": 0.30,
+}
 @dataclass
 class DuplicateGroup:
     hash_val: str
     paths: List[Path]
+# ───────────────────────────────────────── Generic helpers ─────
 def load_yaml(path: Path) -> Dict:
     with path.open(encoding="utf-8") as f:
         return yaml.safe_load(f)
+def parse_label_file(path: Path) -> list[tuple[int, float, float, float, float]]:
+    if not path.exists() or path.stat().st_size == 0:
+        return []
+    try:
+        arr = np.loadtxt(path, dtype=float)
+        if arr.ndim == 1:
+            arr = arr.reshape(1, -1)
+        return [tuple(row) for row in arr]
+    except Exception:
+        return []
 def guess_image_dirs(root: Path) -> List[Path]:
         raise FileNotFoundError("images/ directory hierarchy missing")
     imgs = [p for d in img_dirs for p in d.rglob("*.*") if imghdr.what(p) is not None]
+    labels_root = {d.parent / "labels" for d in img_dirs}
+    lbls = [next((lr / f"{p.stem}.txt" for lr in labels_root if (lr / f"{p.stem}.txt").exists()), None) for p in imgs]
     return imgs, lbls, meta
+# ───────────────────────────────────────── Quality checks ─────
+# Integrity -----------------------------------------------------
 def _is_corrupt(path: Path) -> bool:
     try:
         with Image.open(path) as im:
         return True
+def qc_integrity(imgs: List[Path], lbls: List[Path]):
+    miss_lbl = [i for i, l in zip(imgs, lbls) if l is None]
     corrupt: List[Path] = []
+    with ProcessPoolExecutor(max_workers=CPU_COUNT) as ex:
         fut = {ex.submit(_is_corrupt, p): p for p in imgs}
         for f in tqdm(as_completed(fut), total=len(fut), desc="integrity", leave=False):
             if f.result():
                 corrupt.append(fut[f])
+    score = 100 - (len(miss_lbl) + len(corrupt)) / max(len(imgs), 1) * 100
     return {
         "name": "Integrity",
         "score": max(score, 0),
         "details": {
             "missing_label_files": [str(p) for p in miss_lbl],
             "corrupt_images": [str(p) for p in corrupt],
         },
     }
+# Class balance -------------------------------------------------
+def qc_class_balance(lbls: List[Path]):
     cls_counts = Counter()
     boxes_per_img = []
     for l in lbls:
+        bs = parse_label_file(l) if l else []
         boxes_per_img.append(len(bs))
         cls_counts.update(b[0] for b in bs)
     if not cls_counts:
         return {"name": "Class balance", "score": 0, "details": "No labels"}
+    bal = (min(cls_counts.values()) / max(cls_counts.values())) * 100
     return {
         "name": "Class balance",
         "score": bal,
         },
     }
+# Image quality -------------------------------------------------
+def _quality_stat(path: Path, blur_thr: float):
+    im = cv2.imread(str(path)) if cv2 else None
+    if im is None:
+        return path, False, False, False
+    gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+    lap = cv2.Laplacian(gray, cv2.CV_64F).var()
+    br = gray.mean()
+    return path, lap < blur_thr, br < 25, br > 230
+def qc_image_quality(imgs: List[Path], blur_thr: float = 100.0):
     if cv2 is None:
         return {"name": "Image quality", "score": 100, "details": "cv2 not installed"}
+    blurry: list[Path] = []
+    dark: list[Path] = []
+    bright: list[Path] = []
+    with ProcessPoolExecutor(max_workers=CPU_COUNT) as ex:
+        for p, is_blur, is_dark, is_bright in tqdm(
+            ex.map(lambda x: _quality_stat(x, blur_thr), imgs),
+            total=len(imgs),
+            desc="img‑quality",
+            leave=False,
+        ):
+            if is_blur:
+                blurry.append(p)
+            if is_dark:
+                dark.append(p)
+            if is_bright:
+                bright.append(p)
     bad = len(set(blurry + dark + bright))
     score = 100 - bad / max(len(imgs), 1) * 100
         },
     }
+# Duplicate images ---------------------------------------------
+def qc_duplicates(imgs: List[Path]):
+    # Fast path – use fastdup if installed & enough images
+    if fastdup is not None and len(imgs) > 50:
+        try:
+            fd = fastdup.create(input_dir=str(Path(imgs[0]).parent.parent), work_dir=str(TMP_ROOT / "fastdup"))
+            fd.run()
+            clusters = fd.get_clusters()
+            dup = sum(len(c) - 1 for c in clusters)
+            score = 100 - dup / max(len(imgs), 1) * 100
+            return {
+                "name": "Duplicates",
+                "score": score,
+                "details": {"groups": clusters[:50]},
+            }
+        except Exception:
+            pass  # fallback to hash
     if imagehash is None:
+        return {"name": "Duplicates", "score": 100, "details": "skipped (deps)"}
+    def _hash(p):
+        return str(imagehash.average_hash(Image.open(p)))
     hashes: Dict[str, List[Path]] = defaultdict(list)
+    with ProcessPoolExecutor(max_workers=CPU_COUNT) as ex:
+        for h, p in tqdm(
+            zip(ex.map(_hash, imgs), imgs),
+            total=len(imgs),
+            desc="hashing",
+            leave=False,
+        ):
+            hashes[h].append(p)
     groups = [g for g in hashes.values() if len(g) > 1]
     dup = sum(len(g) - 1 for g in groups)
     return {
         "name": "Duplicates",
         "score": score,
+        "details": {"groups": [[str(p) for p in g] for g in groups[:50]]},
     }
+# Model‑assisted QA --------------------------------------------
 def _rel_iou(b1, b2):
     x1, y1, w1, h1 = b1
     xa1, ya1, xa2, ya2 = x1 - w1 / 2, y1 - h1 / 2, x1 + w1 / 2, y1 + h1 / 2
     xb1, yb1, xb2, yb2 = x2 - w2 / 2, y2 - h2 / 2, x2 + w2 / 2, y2 + h2 / 2
     ix1, iy1, ix2, iy2 = max(xa1, xb1), max(ya1, yb1), min(xa2, xb2), min(ya2, yb2)
+    inter = max(ix2 - ix1, 0) * max(iy2 - iy1, 0)
     union = w1 * h1 + w2 * h2 - inter
+    return inter / union if union else 0.0
+def qc_model_qa(imgs: List[Path], weights: str | None, lbls: List[Path], iou_thr: float = 0.5):
     if weights is None or YOLO is None:
+        return {"name": "Model QA", "score": 100, "details": "skipped (no weights)"}
     model = YOLO(weights)
     ious, mism = [], []
+    for i in range(0, len(imgs), BATCH):
+        batch_paths = imgs[i : i + BATCH]
+        results = model.predict(batch_paths, verbose=False)
+        for p, res in zip(batch_paths, results):
+            gtb = parse_label_file(p.parent.parent / "labels" / f"{p.stem}.txt")
+            if not gtb:
+                continue
+            for cls, x, y, w, h in gtb:
+                best = 0.0
+                for b, c in zip(res.boxes.xywh.cpu().numpy(), res.boxes.cls.cpu().numpy()):
+                    if int(c) != cls:
+                        continue
+                    best = max(best, _rel_iou((x, y, w, h), tuple(b)))
+                ious.append(best)
+                if best < iou_thr:
+                    mism.append(str(p))
     miou = float(np.mean(ious)) if ious else 1.0
     return {
         "name": "Model QA",
         "score": miou * 100,
+        "details": {"mean_iou": miou, "mismatched_images": mism[:50]},
     }
+# Aggregate -----------------------------------------------------
 def aggregate(scores):
     return sum(DEFAULT_W.get(r["name"], 0) * r["score"] for r in scores)
+# ───────────────────────────────────────── Roboflow helpers ────
 RF_RE = re.compile(r"https://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
 def download_rf_dataset(url: str, rf_api: "Roboflow", dest: Path) -> Path:
     project.version(int(ver)).download("yolov8", location=str(ds_dir))
     return ds_dir
+# ───────────────────────────────────────── Main logic ──────────
 def run_quality(root: Path, yaml_override: Path | None, weights: Path | None):
     imgs, lbls, meta = gather_dataset(root, yaml_override)
     res = [
         qc_model_qa(imgs, str(weights) if weights else None, lbls),
     ]
     final = aggregate(res)
+    md = [f"## **{meta.get('name', root.name)}** — Score {final:.1f}/100"]
     for r in res:
         md.append(f"### {r['name']}  {r['score']:.1f}")
         md.append("<details><summary>details</summary>\n\n```json")
     df.index.name = "class"
     return md_str, df
+# ───────────────────────────────────────── Gradio UI ───────────
 def evaluate(
     api_key: str,
     url_txt: gr.File | None,
     reports, dfs = [], []
+    # Roboflow batch ------------------------------------------
     if url_txt:
         if Roboflow is None:
             return "`roboflow` not installed", pd.DataFrame()
             return "Enter Roboflow API key", pd.DataFrame()
         rf = Roboflow(api_key=api_key.strip())
+        for line in Path(url_txt.name).read_text().splitlines():
             if not line.strip():
                 continue
             try:
             except Exception as e:
                 reports.append(f"### {line}\n\n⚠️ {e}")
+    # Manual ZIP ----------------------------------------------
     if zip_file:
         tmp_dir = Path(tempfile.mkdtemp())
         shutil.unpack_archive(zip_file.name, tmp_dir)
         dfs.append(df)
         shutil.rmtree(tmp_dir, ignore_errors=True)
+    # Manual path ---------------------------------------------
     if server_path:
         md, df = run_quality(Path(server_path), Path(yaml_file.name) if yaml_file else None, Path(weights.name) if weights else None)
         reports.append(md)
     combined_df = pd.concat(dfs).groupby(level=0).sum() if dfs else pd.DataFrame()
     return summary_md, combined_df
+# ───────────────────────────────────────── Launch  ────────────
 with gr.Blocks(title="YOLO Dataset Quality Evaluator") as demo:
     gr.Markdown(
         """
 1. Paste your **Roboflow API key**
 2. Upload a **.txt** file – one `https://universe.roboflow.com/.../dataset/x` per line
+### Manual
 * Upload a dataset **ZIP** or type a dataset **path** on the server
 * Optionally supply a custom **data.yaml** and/or a **YOLO .pt** weights file for model‑assisted QA
 """