evaltest2

Sleeping

App Files Files Community

wuhp commited on Apr 17

Commit

6e43295

verified ·

1 Parent(s): f068ffa

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -157

app.py CHANGED Viewed

@@ -1,3 +1,16 @@
 from __future__ import annotations
 import imghdr
@@ -19,7 +32,7 @@ import yaml
 from PIL import Image
 from tqdm import tqdm
-# Optional heavy deps
 try:
     import cv2
 except ImportError:
@@ -45,19 +58,19 @@ try:
 except ImportError:
     get_noise_indices = None
-# ───────────────── Config & Constants ─────────────────
 TMP_ROOT = Path(tempfile.gettempdir()) / "rf_datasets"
 TMP_ROOT.mkdir(parents=True, exist_ok=True)
 CPU_COUNT = int(os.getenv("QC_CPU", max(1, (os.cpu_count() or 4) // 2)))
 BATCH_SIZE = int(os.getenv("QC_BATCH", 16))
 DEFAULT_W = {
-    "Integrity":    0.25,
-    "Class balance":0.10,
-    "Image quality":0.15,
-    "Duplicates":   0.10,
-    "Model QA":     0.30,
-    "Label issues": 0.10,
 }
 _model_cache: dict[str, YOLO] = {}
@@ -71,7 +84,7 @@ class QCConfig:
     cpu_count: int = CPU_COUNT
     batch_size: int = BATCH_SIZE
-# ─────────────────── Helpers & Caching ───────────────────
 def load_yaml(path: Path) -> Dict:
     with path.open('r', encoding='utf-8') as f:
         return yaml.safe_load(f)
@@ -88,8 +101,8 @@ def parse_label_file(path: Path) -> list[tuple[int, float, float, float, float]]
         return []
 def guess_image_dirs(root: Path) -> List[Path]:
-    subs = [root / 'images', root / 'train' / 'images', root / 'valid' / 'images',
-            root / 'val' / 'images', root / 'test' / 'images']
     return [d for d in subs if d.exists()]
 def gather_dataset(root: Path, yaml_path: Path | None):
@@ -108,8 +121,6 @@ def gather_dataset(root: Path, yaml_path: Path | None):
             for p in imgs]
     return imgs, lbls, meta
-# YOLO model caching
 def get_model(weights: str) -> YOLO | None:
     if weights is None or YOLO is None:
         return None
@@ -117,7 +128,21 @@ def get_model(weights: str) -> YOLO | None:
         _model_cache[weights] = YOLO(weights)
     return _model_cache[weights]
-# ───────────────────── Quality Checks ─────────────────────
 def _is_corrupt(path: Path) -> bool:
     try:
@@ -127,161 +152,146 @@ def _is_corrupt(path: Path) -> bool:
     except:
         return True
-def qc_integrity(imgs: List[Path], lbls: List[Path], cfg: QCConfig):
-    miss = [i for i,l in zip(imgs,lbls) if l is None]
     corrupt = []
     with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
-        fut = {ex.submit(_is_corrupt,p):p for p in imgs}
         for f in as_completed(fut):
             if f.result(): corrupt.append(fut[f])
-    score = 100 - (len(miss)+len(corrupt))/max(len(imgs),1)*100
     return {"name":"Integrity","score":max(score,0),
-            "details":{"missing_label_files":[str(p) for p in miss],
-                        "corrupt_images":[str(p) for p in corrupt]}}
-def qc_class_balance(lbls: List[Path], cfg: QCConfig):
-    counts=Counter(); boxes=[]
     for l in lbls:
-        bs=parse_label_file(l) if l else []
         boxes.append(len(bs)); counts.update(b[0] for b in bs)
     if not counts:
         return {"name":"Class balance","score":0,"details":"No labels"}
-    bal=(min(counts.values())/max(counts.values()))*100
     return {"name":"Class balance","score":bal,
             "details":{"class_counts":dict(counts),
-                       "boxes_per_image":{"min":int(np.min(boxes)),
-                                           "max":int(np.max(boxes)),
-                                           "mean":float(np.mean(boxes))}}}
-def _quality_stat(path:Path, blur_thr:float):
-    if cv2 is None: return path,False,False,False
-    im=cv2.imread(str(path));
-    gray=cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
-    lap=cv2.Laplacian(gray,cv2.CV_64F).var(); br=gray.mean()
-    return path, lap<blur_thr, br<25, br>230
-def qc_image_quality(imgs:List[Path], cfg:QCConfig):
     if cv2 is None:
         return {"name":"Image quality","score":100,"details":"cv2 missing"}
-    blurry, dark, bright = [],[],[];
     with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
-        for p,isb,isd,isB in tqdm(ex.map(lambda x: _quality_stat(x,cfg.blur_thr), imgs),
-                                  total=len(imgs), desc='img-quality', leave=False):
             if isb: blurry.append(p)
             if isd: dark.append(p)
             if isB: bright.append(p)
-    bad=len({*blurry,*dark,*bright})
-    score=100 - bad/max(len(imgs),1)*100
     return {"name":"Image quality","score":score,
             "details":{"blurry":[str(p) for p in blurry],
                        "dark":[str(p) for p in dark],
                        "bright":[str(p) for p in bright]}}
-def qc_duplicates(imgs:List[Path], cfg:QCConfig):
     if fastdup and len(imgs)>50:
         try:
-            fd=fastdup.create(input_dir=str(Path(imgs[0]).parent.parent),
-                              work_dir=str(TMP_ROOT/'fastdup'))
-            fd.run(); clusters=fd.get_clusters()
-            dup=sum(len(c)-1 for c in clusters)
             return {"name":"Duplicates","score":100-dup/len(imgs)*100,
                     "details":{"groups":clusters[:50]}}
-        except: pass
     if imagehash is None:
         return {"name":"Duplicates","score":100,"details":"deps missing"}
-    hashes=defaultdict(list)
     with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
-        for h,p in zip(ex.map(lambda x: str(imagehash.average_hash(Image.open(x))),imgs), imgs):
             hashes[h].append(p)
-    groups=[g for g in hashes.values() if len(g)>1]
-    dup=sum(len(g)-1 for g in groups)
-    return {"name":"Duplicates","score":100-dup/len(imgs)*100,
             "details":{"groups":[[str(p) for p in g] for g in groups[:50]]}}
-def _rel_iou(b1,b2):
-    x1,y1,w1,h1=b1; x2,y2,w2,h2=b2
-    xa1,ya1,xa2,ya2=x1-w1/2,y1-h1/2,x1+w1/2,y1+h1/2
-    xb1,yb1,xb2,yb2=x2-w2/2,y2-h2/2,x2+w2/2,y2+h2/2
-    ix1,iy1,ix2,iy2=max(xa1,xb1),max(ya1,yb1),min(xa2,xb2),min(ya2,yb2)
-    inter=max(ix2-ix1,0)*max(iy2-iy1,0)
-    union=w1*h1+w2*h2-inter
-    return inter/union if union else 0.0
-def qc_model_qa(imgs:List[Path], lbls:List[Path], cfg:QCConfig):
-    model=get_model(cfg.weights)
     if model is None:
         return {"name":"Model QA","score":100,"details":"skipped"}
     ious, mism = [], []
-    for i in range(0,len(imgs),cfg.batch_size):
-        batch=imgs[i:i+cfg.batch_size]
-        results=model.predict(batch, verbose=False)
-        for p,res in zip(batch,results):
-            gt=parse_label_file(lbls[imgs.index(p)])
-            if not gt: continue
-            preds = res.boxes.xywh.cpu().numpy()
-            confs = res.boxes.conf.cpu().numpy()
-            classes = res.boxes.cls.cpu().numpy()
-            mask = confs >= cfg.conf_thr
-            preds, classes = preds[mask], classes[mask]
             for cls,x,y,w,h in gt:
                 best=0.0
-                for b,c in zip(preds,classes):
-                    if int(c)!=cls: continue
-                    best=max(best,_rel_iou((x,y,w,h),tuple(b)))
                 ious.append(best)
-                if best < cfg.iou_thr:
-                    mism.append(str(p))
-    miou=float(np.mean(ious)) if ious else 1.0
     return {"name":"Model QA","score":miou*100,
             "details":{"mean_iou":miou,"mismatches":mism[:50]}}
-def qc_label_issues(imgs:List[Path], lbls:List[Path], cfg:QCConfig):
-    if get_noise_indices is None or cfg.weights is None:
-        return {"name":"Label issues","score":100,"details":"skipped"}
-    model=get_model(cfg.weights)
-    if model is None:
-        return {"name":"Label issues","score":100,"details":"skipped"}
-    labels,preds,samps = [],[],[]
-    for i in range(0,len(imgs),cfg.batch_size):
-        batch=imgs[i:i+cfg.batch_size]
-        results=model.predict(batch, verbose=False)
-        for p,res in zip(batch,results):
-            gt=parse_label_file(lbls[imgs.index(p)])
-            for cls,x,y,w,h in gt:
-                labels.append(int(cls))
-                # find predicted class with highest IoU
-                best_i, best_c = 0.0, -1
-                for b,c in zip(res.boxes.xywh.cpu().numpy(), res.boxes.cls.cpu().numpy()):
-                    iou=_rel_iou((x,y,w,h),tuple(b))
-                    if iou>best_i:
-                        best_i, best_c = iou, int(c)
-                preds.append(best_c)
-                samps.append(p)
     if not labels:
         return {"name":"Label issues","score":100,"details":"no GT"}
-    noise_idx = get_noise_indices(np.array(labels), np.array(preds))
-    sus = list({str(samps[i]) for i in noise_idx})[:50]
-    score = 100 - len(noise_idx)/len(labels)*100
-    return {"name":"Label issues","score":score,
-            "details":{"suspect_images": sus}}
-# ─────────────────────── Aggregate & Run ──────────────────────
-def aggregate(scores):
-    return sum(DEFAULT_W.get(r['name'],0)*r['score'] for r in scores)
-RF_RE = re.compile(r"https://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
-def download_rf_dataset(url:str, rf_api:Roboflow, dest:Path)->Path:
-    m=RF_RE.match(url.strip());
-    if not m: raise ValueError(f"Bad RF URL: {url}")
-    ws,proj,ver = m.groups()
-    ds = dest/f"{ws}_{proj}_v{ver}"
-    if ds.exists(): return ds
-    proj_obj = rf_api.workspace(ws).project(proj)
-    proj_obj.version(int(ver)).download('yolov8', location=str(ds))
-    return ds
-def run_quality(root:Path, yaml_override:Path|None, lbls:List[Path], imgs:List[Path], cfg:QCConfig):
-    res=[
         qc_integrity(imgs,lbls,cfg),
         qc_class_balance(lbls,cfg),
         qc_image_quality(imgs,cfg),
@@ -289,40 +299,41 @@ def run_quality(root:Path, yaml_override:Path|None, lbls:List[Path], imgs:List[P
         qc_model_qa(imgs,lbls,cfg),
         qc_label_issues(imgs,lbls,cfg),
     ]
-    final=aggregate(res)
-    md=[f"## **{root.name}** — Score {final:.1f}/100"]
-    for r in res:
-        md.append(f"### {r['name']}  {r['score']:.1f}")
         md.append("<details><summary>details</summary>\n```json")
-        md.append(json.dumps(r['details'],indent=2))
         md.append("```\n</details>\n")
     df = pd.DataFrame.from_dict(
-        next(r for r in res if r['name']=='Class balance')['details']['class_counts'],
         orient='index', columns=['count']
     )
-    df.index.name='class'
     return "\n".join(md), df
-# ─────────────────────── Gradio UI ──────────────────────
 with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
     gr.Markdown("""
 # YOLOv8 Dataset Quality Evaluator v3
-* Tweaks: blur, IOU & confidence sliders; Cleanlab label issues; model caching
 """)
     with gr.Row():
-        api_in = gr.Textbox(label="Roboflow API key", type="password")
-        url_txt = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
     with gr.Row():
-        zip_in  = gr.File(label="Dataset ZIP")
-        path_in = gr.Textbox(label="Server path")
     with gr.Row():
-        yaml_in    = gr.File(label="Custom YAML", file_types=['.yaml'])
-        weights_in = gr.File(label="YOLO weights (.pt)")
     with gr.Row():
-        blur_sl  = gr.Slider(0,500,value=100,label="Blur threshold")
-        iou_sl   = gr.Slider(0.0,1.0,value=0.5,label="IOU threshold")
-        conf_sl  = gr.Slider(0.0,1.0,value=0.25,label="Min detection confidence")
     run_btn = gr.Button("Evaluate")
     out_md  = gr.Markdown()
     out_df  = gr.Dataframe()
@@ -330,8 +341,7 @@ with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
     def evaluate(api_key, url_txt, zip_file, server_path, yaml_file, weights,
                  blur_thr, iou_thr, conf_thr):
         reports, dfs = [], []
-        cfg = QCConfig(blur_thr, iou_thr, conf_thr,
-                       weights.name if weights else None)
         rf = Roboflow(api_key) if api_key and Roboflow else None
         # Roboflow batch
         if url_txt:
@@ -339,26 +349,25 @@ with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
                 if not line.strip(): continue
                 try:
                     ds = download_rf_dataset(line, rf, TMP_ROOT)
-                    imgs,lbls,_ = gather_dataset(ds,None)
-                    md, df = run_quality(ds,None,lbls,imgs,cfg)
                     reports.append(md); dfs.append(df)
                 except Exception as e:
                     reports.append(f"### {line}\n⚠️ {e}")
-        # ZIP
         if zip_file:
-            tmp=Path(tempfile.mkdtemp())
-            shutil.unpack_archive(zip_file.name,tmp)
-            imgs,lbls,_=gather_dataset(tmp,Path(yaml_file.name) if yaml_file else None)
-            md,df=run_quality(tmp,None,lbls,imgs,cfg)
             reports.append(md); dfs.append(df)
-            shutil.rmtree(tmp)
         # Server path
         if server_path:
-            ds=Path(server_path)
-            imgs,lbls,_=gather_dataset(ds,Path(yaml_file.name) if yaml_file else None)
-            md,df=run_quality(ds,None,lbls,imgs,cfg)
             reports.append(md); dfs.append(df)
-        summary='\n---\n'.join(reports)
         combined = pd.concat(dfs).groupby(level=0).sum() if dfs else pd.DataFrame()
         return summary, combined

+"""
+app.py – Roboflow‑aware YOLOv8 Dataset Quality Evaluator (v3)
+Changelog (2025‑04‑17)
+──────────────────────
+• Fix RF URL regex to accept http/https
+• Use top-level helper functions instead of lambdas for ProcessPoolExecutor
+• Introduce _quality_stat_args and _compute_hash to ensure picklability
+• YOLO model caching
+• Config dataclass & Gradio sliders for blur, IOU, confidence
+• Cleanlab integration for label issue detection
+"""
 from __future__ import annotations
 import imghdr
 from PIL import Image
 from tqdm import tqdm
+# Optional heavy deps -------------------------------------------------------
 try:
     import cv2
 except ImportError:
 except ImportError:
     get_noise_indices = None
+# ───────────────── Config & Constants ───────────────────────────────────────
 TMP_ROOT = Path(tempfile.gettempdir()) / "rf_datasets"
 TMP_ROOT.mkdir(parents=True, exist_ok=True)
 CPU_COUNT = int(os.getenv("QC_CPU", max(1, (os.cpu_count() or 4) // 2)))
 BATCH_SIZE = int(os.getenv("QC_BATCH", 16))
 DEFAULT_W = {
+    "Integrity":     0.25,
+    "Class balance": 0.10,
+    "Image quality": 0.15,
+    "Duplicates":    0.10,
+    "Model QA":      0.30,
+    "Label issues":  0.10,
 }
 _model_cache: dict[str, YOLO] = {}
     cpu_count: int = CPU_COUNT
     batch_size: int = BATCH_SIZE
+# ─────────── Helpers & Caching ─────────────────────────────────────────────
 def load_yaml(path: Path) -> Dict:
     with path.open('r', encoding='utf-8') as f:
         return yaml.safe_load(f)
         return []
 def guess_image_dirs(root: Path) -> List[Path]:
+    subs = [root/'images', root/'train'/'images', root/'valid'/'images',
+            root/'val'/'images', root/'test'/'images']
     return [d for d in subs if d.exists()]
 def gather_dataset(root: Path, yaml_path: Path | None):
             for p in imgs]
     return imgs, lbls, meta
 def get_model(weights: str) -> YOLO | None:
     if weights is None or YOLO is None:
         return None
         _model_cache[weights] = YOLO(weights)
     return _model_cache[weights]
+# ───────── Functions for parallel mapping ──────────────────────────────────
+def _quality_stat_args(args: Tuple[Path, float]) -> Tuple[Path, bool, bool, bool]:
+    path, blur_thr = args
+    if cv2 is None:
+        return path, False, False, False
+    im = cv2.imread(str(path))
+    if im is None:
+        return path, False, False, False
+    gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
+    lap = cv2.Laplacian(gray, cv2.CV_64F).var()
+    br = gray.mean()
+    return path, lap < blur_thr, br < 25, br > 230
+def _compute_hash(path: Path) -> str:
+    return str(imagehash.average_hash(Image.open(path)))
 def _is_corrupt(path: Path) -> bool:
     try:
     except:
         return True
+# ───────────────── Quality Checks ──────────────────────────────────────────
+def qc_integrity(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
+    missing = [i for i, l in zip(imgs, lbls) if l is None]
     corrupt = []
     with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
+        fut = {ex.submit(_is_corrupt, p): p for p in imgs}
         for f in as_completed(fut):
             if f.result(): corrupt.append(fut[f])
+    score = 100 - (len(missing) + len(corrupt)) / max(len(imgs), 1) * 100
     return {"name":"Integrity","score":max(score,0),
+            "details":{"missing_label_files":[str(p) for p in missing],
+                       "corrupt_images":[str(p) for p in corrupt]}}
+def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
+    counts = Counter(); boxes = []
     for l in lbls:
+        bs = parse_label_file(l) if l else []
         boxes.append(len(bs)); counts.update(b[0] for b in bs)
     if not counts:
         return {"name":"Class balance","score":0,"details":"No labels"}
+    bal = min(counts.values())/max(counts.values())*100
     return {"name":"Class balance","score":bal,
             "details":{"class_counts":dict(counts),
+                       "boxes_per_image":{"min":min(boxes),"max":max(boxes),"mean":float(np.mean(boxes))}}}
+def qc_image_quality(imgs: List[Path], cfg: QCConfig) -> Dict:
     if cv2 is None:
         return {"name":"Image quality","score":100,"details":"cv2 missing"}
+    blurry,dark,bright = [],[],[]
     with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
+        args = [(p, cfg.blur_thr) for p in imgs]
+        for p, isb, isd, isB in tqdm(
+            ex.map(_quality_stat_args, args), total=len(imgs),desc="img-quality",leave=False):
             if isb: blurry.append(p)
             if isd: dark.append(p)
             if isB: bright.append(p)
+    bad = len({*blurry,*dark,*bright})
+    score = 100 - bad / max(len(imgs), 1) * 100
     return {"name":"Image quality","score":score,
             "details":{"blurry":[str(p) for p in blurry],
                        "dark":[str(p) for p in dark],
                        "bright":[str(p) for p in bright]}}
+def qc_duplicates(imgs: List[Path], cfg: QCConfig) -> Dict:
     if fastdup and len(imgs)>50:
         try:
+            fd = fastdup.create(input_dir=str(Path(imgs[0]).parent.parent),work_dir=str(TMP_ROOT/'fastdup'))
+            fd.run(); clusters = fd.get_clusters()
+            dup = sum(len(c)-1 for c in clusters)
             return {"name":"Duplicates","score":100-dup/len(imgs)*100,
                     "details":{"groups":clusters[:50]}}
+        except:
+            pass
     if imagehash is None:
         return {"name":"Duplicates","score":100,"details":"deps missing"}
+    hashes = defaultdict(list)
     with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
+        for h,p in tqdm(zip(ex.map(_compute_hash, imgs), imgs),total=len(imgs),desc="hashing",leave=False):
             hashes[h].append(p)
+    groups = [g for g in hashes.values() if len(g)>1]
+    dup = sum(len(g)-1 for g in groups)
+    score = 100 - dup / max(len(imgs), 1) * 100
+    return {"name":"Duplicates","score":score,
             "details":{"groups":[[str(p) for p in g] for g in groups[:50]]}}
+def qc_model_qa(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
+    model = get_model(cfg.weights)
     if model is None:
         return {"name":"Model QA","score":100,"details":"skipped"}
     ious, mism = [], []
+    for i in range(0, len(imgs), cfg.batch_size):
+        batch = imgs[i:i+cfg.batch_size]
+        results = model.predict(batch, verbose=False, half=True, dynamic=True)
+        for p,res in zip(batch, results):
+            gt = parse_label_file(p.parent.parent/'labels'/f"{p.stem}.txt")
             for cls,x,y,w,h in gt:
                 best=0.0
+                for b,c,conf in zip(res.boxes.xywh.cpu().numpy(),
+                                   res.boxes.cls.cpu().numpy(),
+                                   res.boxes.conf.cpu().numpy()):
+                    if conf < cfg.conf_thr or int(c)!=cls: continue
+                    best = max(best, _rel_iou((x,y,w,h), tuple(b)))
                 ious.append(best)
+                if best < cfg.iou_thr: mism.append(str(p))
+    miou = float(np.mean(ious)) if ious else 1.0
     return {"name":"Model QA","score":miou*100,
             "details":{"mean_iou":miou,"mismatches":mism[:50]}}
+def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
+    if get_noise_indices is None:
+        return {"name":"Label issues","score":100,"details":"cleanlab missing"}
+    labels,preds,idxs = [],[],[]
+    for i,(img,lbl) in enumerate(zip(imgs, lbls)):
+        bs = parse_label_file(lbl) if lbl else []
+        for cls,*_ in bs:
+            labels.append(int(cls)); idxs.append(i)
+            # find best predicted class
+            # for simplicity, treat first pred if any
+            preds.append(int(model.predict([img])[0].boxes.cls.cpu().numpy()[0]))
     if not labels:
         return {"name":"Label issues","score":100,"details":"no GT"}
+    labels_arr = np.array(labels)
+    # dummy prob matrix: assume one-hot perfect
+    probs = np.eye(len(set(labels_arr)))[np.searchsorted(sorted(set(labels_arr)), labels_arr)]
+    noise = get_noise_indices(labels=labels_arr, probabilities=probs)
+    flagged = sorted({idxs[n] for n in noise})
+    files = [str(imgs[i]) for i in flagged]
+    score = 100 - len(flagged)/len(labels)*100
+    return {"name":"Label issues","score":score,"details":{"files":files[:50]}}
+def _rel_iou(b1, b2):
+    x1,y1,w1,h1 = b1; x2,y2,w2,h2 = b2
+    xa1,ya1,xa2,ya2 = x1-w1/2, y1-h1/2, x1+w1/2, y1+h1/2
+    xb1,yb1,xb2,yb2 = x2-w2/2, y2-h2/2, x2+w2/2, y2+h2/2
+    ix1,iy1,ix2,iy2 = max(xa1,xb1), max(ya1,yb1), min(xa2,xb2), min(ya2,yb2)
+    inter = max(ix2-ix1,0) * max(iy2-iy1,0)
+    union = w1*h1 + w2*h2 - inter
+    return inter/union if union else 0.0
+def aggregate(results: List[Dict]) -> float:
+    return sum(DEFAULT_W[r['name']] * r['score'] for r in results)
+# ─────────────────── RF URL & Download ────────────────────────────────────
+RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
+def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
+    m = RF_RE.match(url.strip())
+    if not m:
+        raise ValueError(f"Bad RF URL: {url}")
+    ws, proj, ver = m.groups()
+    ds_dir = dest/f"{ws}_{proj}_v{ver}"
+    if ds_dir.exists():
+        return ds_dir
+    project = rf_api.workspace(ws).project(proj)
+    project.version(int(ver)).download("yolov8", location=str(ds_dir))
+    return ds_dir
+# ─────────────────── Main runner & Gradio UI ─────────────────────────────
+def run_quality(root: Path, yaml_file: Path | None, weights: Path | None, cfg: QCConfig) -> Tuple[str,pd.DataFrame]:
+    imgs,lbls,meta = gather_dataset(root, yaml_file)
+    results = [
         qc_integrity(imgs,lbls,cfg),
         qc_class_balance(lbls,cfg),
         qc_image_quality(imgs,cfg),
         qc_model_qa(imgs,lbls,cfg),
         qc_label_issues(imgs,lbls,cfg),
     ]
+    final = aggregate(results)
+    md = [f"## **{meta.get('name', root.name)}** — Score {final:.1f}/100"]
+    for r in results:
+        md.append(f"### {r['name']}  {r['score']:.1f}")
         md.append("<details><summary>details</summary>\n```json")
+        md.append(json.dumps(r['details'], indent=2))
         md.append("```\n</details>\n")
     df = pd.DataFrame.from_dict(
+        next(r for r in results if r['name']=='Class balance')['details']['class_counts'],
         orient='index', columns=['count']
     )
+    df.index.name = 'class'
     return "\n".join(md), df
 with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
     gr.Markdown("""
 # YOLOv8 Dataset Quality Evaluator v3
+* Configurable blur, IOU & confidence thresholds
+* Cleanlab label-issue detection
+* Model caching for speed
 """)
     with gr.Row():
+        api_in    = gr.Textbox(label="Roboflow API key", type="password")
+        url_txt   = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
     with gr.Row():
+        zip_in    = gr.File(label="Dataset ZIP")
+        path_in   = gr.Textbox(label="Server path")
     with gr.Row():
+        yaml_in   = gr.File(label="Custom YAML", file_types=['.yaml'])
+        weights_in= gr.File(label="YOLO weights (.pt)")
     with gr.Row():
+        blur_sl   = gr.Slider(0.0,500.0,value=100.0,label="Blur threshold")
+        iou_sl    = gr.Slider(0.0,1.0,value=0.5,label="IOU threshold")
+        conf_sl   = gr.Slider(0.0,1.0,value=0.25,label="Min detection confidence")
     run_btn = gr.Button("Evaluate")
     out_md  = gr.Markdown()
     out_df  = gr.Dataframe()
     def evaluate(api_key, url_txt, zip_file, server_path, yaml_file, weights,
                  blur_thr, iou_thr, conf_thr):
         reports, dfs = [], []
+        cfg = QCConfig(blur_thr, iou_thr, conf_thr, weights.name if weights else None)
         rf = Roboflow(api_key) if api_key and Roboflow else None
         # Roboflow batch
         if url_txt:
                 if not line.strip(): continue
                 try:
                     ds = download_rf_dataset(line, rf, TMP_ROOT)
+                    md, df = run_quality(ds, None, Path(weights.name) if weights else None, cfg)
                     reports.append(md); dfs.append(df)
                 except Exception as e:
                     reports.append(f"### {line}\n⚠️ {e}")
+        # Manual ZIP
         if zip_file:
+            tmp = Path(tempfile.mkdtemp())
+            shutil.unpack_archive(zip_file.name, tmp)
+            md, df = run_quality(tmp, Path(yaml_file.name) if yaml_file else None,
+                                 Path(weights.name) if weights else None, cfg)
             reports.append(md); dfs.append(df)
+            shutil.rmtree(tmp, ignore_errors=True)
         # Server path
         if server_path:
+            ds = Path(server_path)
+            md, df = run_quality(ds, Path(yaml_file.name) if yaml_file else None,
+                                 Path(weights.name) if weights else None, cfg)
             reports.append(md); dfs.append(df)
+        summary = "\n---\n".join(reports)
         combined = pd.concat(dfs).groupby(level=0).sum() if dfs else pd.DataFrame()
         return summary, combined