wuhp commited on
Commit
d72206a
Β·
verified Β·
1 Parent(s): 6e43295

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -75
app.py CHANGED
@@ -4,11 +4,11 @@ app.py – Roboflow‑aware YOLOv8 Dataset Quality Evaluator (v3)
4
  Changelog (2025‑04‑17)
5
  ──────────────────────
6
  β€’ Fix RF URL regex to accept http/https
7
- β€’ Use top-level helper functions instead of lambdas for ProcessPoolExecutor
8
- β€’ Introduce _quality_stat_args and _compute_hash to ensure picklability
9
  β€’ YOLO model caching
10
  β€’ Config dataclass & Gradio sliders for blur, IOU, confidence
11
- β€’ Cleanlab integration for label issue detection
12
  """
13
 
14
  from __future__ import annotations
@@ -97,13 +97,13 @@ def parse_label_file(path: Path) -> list[tuple[int, float, float, float, float]]
97
  if arr.ndim == 1:
98
  arr = arr.reshape(1, -1)
99
  return [tuple(row) for row in arr]
100
- except Exception:
101
  return []
102
 
103
  def guess_image_dirs(root: Path) -> List[Path]:
104
- subs = [root/'images', root/'train'/'images', root/'valid'/'images',
105
- root/'val'/'images', root/'test'/'images']
106
- return [d for d in subs if d.exists()]
107
 
108
  def gather_dataset(root: Path, yaml_path: Path | None):
109
  if yaml_path is None:
@@ -116,13 +116,13 @@ def gather_dataset(root: Path, yaml_path: Path | None):
116
  if not img_dirs:
117
  raise FileNotFoundError("images/ directory missing")
118
  imgs = [p for d in img_dirs for p in d.rglob('*.*') if imghdr.what(p)]
119
- labels_root = {d.parent/'labels' for d in img_dirs}
120
- lbls = [next((lr/f"{p.stem}.txt" for lr in labels_root if (lr/f"{p.stem}.txt").exists()), None)
121
  for p in imgs]
122
  return imgs, lbls, meta
123
 
124
  def get_model(weights: str) -> YOLO | None:
125
- if weights is None or YOLO is None:
126
  return None
127
  if weights not in _model_cache:
128
  _model_cache[weights] = YOLO(weights)
@@ -130,7 +130,7 @@ def get_model(weights: str) -> YOLO | None:
130
 
131
  # ───────── Functions for parallel mapping ──────────────────────────────────
132
  def _quality_stat_args(args: Tuple[Path, float]) -> Tuple[Path, bool, bool, bool]:
133
- path, blur_thr = args
134
  if cv2 is None:
135
  return path, False, False, False
136
  im = cv2.imread(str(path))
@@ -138,11 +138,8 @@ def _quality_stat_args(args: Tuple[Path, float]) -> Tuple[Path, bool, bool, bool
138
  return path, False, False, False
139
  gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
140
  lap = cv2.Laplacian(gray, cv2.CV_64F).var()
141
- br = gray.mean()
142
- return path, lap < blur_thr, br < 25, br > 230
143
-
144
- def _compute_hash(path: Path) -> str:
145
- return str(imagehash.average_hash(Image.open(path)))
146
 
147
  def _is_corrupt(path: Path) -> bool:
148
  try:
@@ -154,19 +151,19 @@ def _is_corrupt(path: Path) -> bool:
154
 
155
  # ───────────────── Quality Checks ──────────────────────────────────────────
156
  def qc_integrity(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
157
- missing = [i for i, l in zip(imgs, lbls) if l is None]
158
  corrupt = []
159
  with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
160
  fut = {ex.submit(_is_corrupt, p): p for p in imgs}
161
  for f in as_completed(fut):
162
  if f.result(): corrupt.append(fut[f])
163
- score = 100 - (len(missing) + len(corrupt)) / max(len(imgs), 1) * 100
164
  return {"name":"Integrity","score":max(score,0),
165
  "details":{"missing_label_files":[str(p) for p in missing],
166
  "corrupt_images":[str(p) for p in corrupt]}}
167
 
168
  def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
169
- counts = Counter(); boxes = []
170
  for l in lbls:
171
  bs = parse_label_file(l) if l else []
172
  boxes.append(len(bs)); counts.update(b[0] for b in bs)
@@ -175,67 +172,69 @@ def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
175
  bal = min(counts.values())/max(counts.values())*100
176
  return {"name":"Class balance","score":bal,
177
  "details":{"class_counts":dict(counts),
178
- "boxes_per_image":{"min":min(boxes),"max":max(boxes),"mean":float(np.mean(boxes))}}}
 
 
179
 
180
  def qc_image_quality(imgs: List[Path], cfg: QCConfig) -> Dict:
181
  if cv2 is None:
182
  return {"name":"Image quality","score":100,"details":"cv2 missing"}
183
- blurry,dark,bright = [],[],[]
184
  with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
185
  args = [(p, cfg.blur_thr) for p in imgs]
186
  for p, isb, isd, isB in tqdm(
187
- ex.map(_quality_stat_args, args), total=len(imgs),desc="img-quality",leave=False):
 
 
188
  if isb: blurry.append(p)
189
  if isd: dark.append(p)
190
  if isB: bright.append(p)
191
  bad = len({*blurry,*dark,*bright})
192
- score = 100 - bad / max(len(imgs), 1) * 100
193
  return {"name":"Image quality","score":score,
194
  "details":{"blurry":[str(p) for p in blurry],
195
  "dark":[str(p) for p in dark],
196
  "bright":[str(p) for p in bright]}}
197
 
198
  def qc_duplicates(imgs: List[Path], cfg: QCConfig) -> Dict:
199
- if fastdup and len(imgs)>50:
 
200
  try:
201
- fd = fastdup.create(input_dir=str(Path(imgs[0]).parent.parent),work_dir=str(TMP_ROOT/'fastdup'))
202
- fd.run(); clusters = fd.get_clusters()
 
 
 
 
203
  dup = sum(len(c)-1 for c in clusters)
204
  return {"name":"Duplicates","score":100-dup/len(imgs)*100,
205
  "details":{"groups":clusters[:50]}}
206
- except:
207
- pass
208
- if imagehash is None:
209
- return {"name":"Duplicates","score":100,"details":"deps missing"}
210
- hashes = defaultdict(list)
211
- with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
212
- for h,p in tqdm(zip(ex.map(_compute_hash, imgs), imgs),total=len(imgs),desc="hashing",leave=False):
213
- hashes[h].append(p)
214
- groups = [g for g in hashes.values() if len(g)>1]
215
- dup = sum(len(g)-1 for g in groups)
216
- score = 100 - dup / max(len(imgs), 1) * 100
217
- return {"name":"Duplicates","score":score,
218
- "details":{"groups":[[str(p) for p in g] for g in groups[:50]]}}
219
 
220
  def qc_model_qa(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
221
  model = get_model(cfg.weights)
222
  if model is None:
223
  return {"name":"Model QA","score":100,"details":"skipped"}
224
  ious, mism = [], []
225
- for i in range(0, len(imgs), cfg.batch_size):
226
  batch = imgs[i:i+cfg.batch_size]
227
  results = model.predict(batch, verbose=False, half=True, dynamic=True)
228
- for p,res in zip(batch, results):
229
  gt = parse_label_file(p.parent.parent/'labels'/f"{p.stem}.txt")
230
  for cls,x,y,w,h in gt:
231
  best=0.0
232
  for b,c,conf in zip(res.boxes.xywh.cpu().numpy(),
233
  res.boxes.cls.cpu().numpy(),
234
  res.boxes.conf.cpu().numpy()):
235
- if conf < cfg.conf_thr or int(c)!=cls: continue
236
- best = max(best, _rel_iou((x,y,w,h), tuple(b)))
237
  ious.append(best)
238
- if best < cfg.iou_thr: mism.append(str(p))
239
  miou = float(np.mean(ious)) if ious else 1.0
240
  return {"name":"Model QA","score":miou*100,
241
  "details":{"mean_iou":miou,"mismatches":mism[:50]}}
@@ -243,52 +242,53 @@ def qc_model_qa(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
243
  def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
244
  if get_noise_indices is None:
245
  return {"name":"Label issues","score":100,"details":"cleanlab missing"}
246
- labels,preds,idxs = [],[],[]
247
- for i,(img,lbl) in enumerate(zip(imgs, lbls)):
 
248
  bs = parse_label_file(lbl) if lbl else []
249
  for cls,*_ in bs:
250
  labels.append(int(cls)); idxs.append(i)
251
- # find best predicted class
252
- # for simplicity, treat first pred if any
253
- preds.append(int(model.predict([img])[0].boxes.cls.cpu().numpy()[0]))
254
  if not labels:
255
  return {"name":"Label issues","score":100,"details":"no GT"}
256
  labels_arr = np.array(labels)
257
- # dummy prob matrix: assume one-hot perfect
258
- probs = np.eye(len(set(labels_arr)))[np.searchsorted(sorted(set(labels_arr)), labels_arr)]
 
259
  noise = get_noise_indices(labels=labels_arr, probabilities=probs)
260
- flagged = sorted({idxs[n] for n in noise})
261
- files = [str(imgs[i]) for i in flagged]
262
- score = 100 - len(flagged)/len(labels)*100
263
- return {"name":"Label issues","score":score,"details":{"files":files[:50]}}
264
-
265
- def _rel_iou(b1, b2):
266
- x1,y1,w1,h1 = b1; x2,y2,w2,h2 = b2
267
- xa1,ya1,xa2,ya2 = x1-w1/2, y1-h1/2, x1+w1/2, y1+h1/2
268
- xb1,yb1,xb2,yb2 = x2-w2/2, y2-h2/2, x2+w2/2, y2+h2/2
269
- ix1,iy1,ix2,iy2 = max(xa1,xb1), max(ya1,yb1), min(xa2,xb2), min(ya2,yb2)
270
- inter = max(ix2-ix1,0) * max(iy2-iy1,0)
271
- union = w1*h1 + w2*h2 - inter
 
272
  return inter/union if union else 0.0
273
 
274
  def aggregate(results: List[Dict]) -> float:
275
- return sum(DEFAULT_W[r['name']] * r['score'] for r in results)
276
 
277
- # ─────────────────── RF URL & Download ────────────────────────────────────
278
  RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
 
279
  def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
280
  m = RF_RE.match(url.strip())
281
  if not m:
282
  raise ValueError(f"Bad RF URL: {url}")
283
  ws, proj, ver = m.groups()
284
  ds_dir = dest/f"{ws}_{proj}_v{ver}"
285
- if ds_dir.exists():
286
- return ds_dir
287
- project = rf_api.workspace(ws).project(proj)
288
- project.version(int(ver)).download("yolov8", location=str(ds_dir))
289
  return ds_dir
290
 
291
- # ─────────────────── Main runner & Gradio UI ─────────────────────────────
292
  def run_quality(root: Path, yaml_file: Path | None, weights: Path | None, cfg: QCConfig) -> Tuple[str,pd.DataFrame]:
293
  imgs,lbls,meta = gather_dataset(root, yaml_file)
294
  results = [
@@ -300,11 +300,11 @@ def run_quality(root: Path, yaml_file: Path | None, weights: Path | None, cfg: Q
300
  qc_label_issues(imgs,lbls,cfg),
301
  ]
302
  final = aggregate(results)
303
- md = [f"## **{meta.get('name', root.name)}** β€” ScoreΒ {final:.1f}/100"]
304
  for r in results:
305
  md.append(f"### {r['name']}Β Β {r['score']:.1f}")
306
  md.append("<details><summary>details</summary>\n```json")
307
- md.append(json.dumps(r['details'], indent=2))
308
  md.append("```\n</details>\n")
309
  df = pd.DataFrame.from_dict(
310
  next(r for r in results if r['name']=='Class balance')['details']['class_counts'],
@@ -319,6 +319,7 @@ with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
319
 
320
  * Configurable blur, IOU & confidence thresholds
321
  * Cleanlab label-issue detection
 
322
  * Model caching for speed
323
  """)
324
  with gr.Row():
@@ -343,7 +344,6 @@ with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
343
  reports, dfs = [], []
344
  cfg = QCConfig(blur_thr, iou_thr, conf_thr, weights.name if weights else None)
345
  rf = Roboflow(api_key) if api_key and Roboflow else None
346
- # Roboflow batch
347
  if url_txt:
348
  for line in Path(url_txt.name).read_text().splitlines():
349
  if not line.strip(): continue
@@ -353,7 +353,6 @@ with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
353
  reports.append(md); dfs.append(df)
354
  except Exception as e:
355
  reports.append(f"### {line}\n⚠️ {e}")
356
- # Manual ZIP
357
  if zip_file:
358
  tmp = Path(tempfile.mkdtemp())
359
  shutil.unpack_archive(zip_file.name, tmp)
@@ -361,7 +360,6 @@ with gr.Blocks(title="YOLO Dataset Quality Evaluator v3") as demo:
361
  Path(weights.name) if weights else None, cfg)
362
  reports.append(md); dfs.append(df)
363
  shutil.rmtree(tmp, ignore_errors=True)
364
- # Server path
365
  if server_path:
366
  ds = Path(server_path)
367
  md, df = run_quality(ds, Path(yaml_file.name) if yaml_file else None,
 
4
  Changelog (2025‑04‑17)
5
  ──────────────────────
6
  β€’ Fix RF URL regex to accept http/https
7
+ β€’ Top-level functions for parallel mapping (picklable)
8
+ β€’ Fastdup-only path in qc_duplicates (skips hashing fallback)
9
  β€’ YOLO model caching
10
  β€’ Config dataclass & Gradio sliders for blur, IOU, confidence
11
+ β€’ Cleanlab integration for label-issue detection
12
  """
13
 
14
  from __future__ import annotations
 
97
  if arr.ndim == 1:
98
  arr = arr.reshape(1, -1)
99
  return [tuple(row) for row in arr]
100
+ except:
101
  return []
102
 
103
  def guess_image_dirs(root: Path) -> List[Path]:
104
+ candidates = [root/'images', root/'train'/'images', root/'valid'/'images',
105
+ root/'val'/'images', root/'test'/'images']
106
+ return [d for d in candidates if d.exists()]
107
 
108
  def gather_dataset(root: Path, yaml_path: Path | None):
109
  if yaml_path is None:
 
116
  if not img_dirs:
117
  raise FileNotFoundError("images/ directory missing")
118
  imgs = [p for d in img_dirs for p in d.rglob('*.*') if imghdr.what(p)]
119
+ labels_roots = {d.parent/'labels' for d in img_dirs}
120
+ lbls = [next((lr/f"{p.stem}.txt" for lr in labels_roots if (lr/f"{p.stem}.txt").exists()), None)
121
  for p in imgs]
122
  return imgs, lbls, meta
123
 
124
  def get_model(weights: str) -> YOLO | None:
125
+ if not weights or YOLO is None:
126
  return None
127
  if weights not in _model_cache:
128
  _model_cache[weights] = YOLO(weights)
 
130
 
131
  # ───────── Functions for parallel mapping ──────────────────────────────────
132
  def _quality_stat_args(args: Tuple[Path, float]) -> Tuple[Path, bool, bool, bool]:
133
+ path, thr = args
134
  if cv2 is None:
135
  return path, False, False, False
136
  im = cv2.imread(str(path))
 
138
  return path, False, False, False
139
  gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
140
  lap = cv2.Laplacian(gray, cv2.CV_64F).var()
141
+ mean = gray.mean()
142
+ return path, lap < thr, mean < 25, mean > 230
 
 
 
143
 
144
  def _is_corrupt(path: Path) -> bool:
145
  try:
 
151
 
152
  # ───────────────── Quality Checks ──────────────────────────────────────────
153
  def qc_integrity(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
154
+ missing = [i for i,l in zip(imgs,lbls) if l is None]
155
  corrupt = []
156
  with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
157
  fut = {ex.submit(_is_corrupt, p): p for p in imgs}
158
  for f in as_completed(fut):
159
  if f.result(): corrupt.append(fut[f])
160
+ score = 100 - (len(missing)+len(corrupt))/max(len(imgs),1)*100
161
  return {"name":"Integrity","score":max(score,0),
162
  "details":{"missing_label_files":[str(p) for p in missing],
163
  "corrupt_images":[str(p) for p in corrupt]}}
164
 
165
  def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
166
+ counts, boxes = Counter(), []
167
  for l in lbls:
168
  bs = parse_label_file(l) if l else []
169
  boxes.append(len(bs)); counts.update(b[0] for b in bs)
 
172
  bal = min(counts.values())/max(counts.values())*100
173
  return {"name":"Class balance","score":bal,
174
  "details":{"class_counts":dict(counts),
175
+ "boxes_per_image":{
176
+ "min":min(boxes),"max":max(boxes),
177
+ "mean":float(np.mean(boxes))}}}
178
 
179
  def qc_image_quality(imgs: List[Path], cfg: QCConfig) -> Dict:
180
  if cv2 is None:
181
  return {"name":"Image quality","score":100,"details":"cv2 missing"}
182
+ blurry, dark, bright = [], [], []
183
  with ProcessPoolExecutor(max_workers=cfg.cpu_count) as ex:
184
  args = [(p, cfg.blur_thr) for p in imgs]
185
  for p, isb, isd, isB in tqdm(
186
+ ex.map(_quality_stat_args, args), total=len(imgs),
187
+ desc="img-quality", leave=False
188
+ ):
189
  if isb: blurry.append(p)
190
  if isd: dark.append(p)
191
  if isB: bright.append(p)
192
  bad = len({*blurry,*dark,*bright})
193
+ score = 100 - bad/max(len(imgs),1)*100
194
  return {"name":"Image quality","score":score,
195
  "details":{"blurry":[str(p) for p in blurry],
196
  "dark":[str(p) for p in dark],
197
  "bright":[str(p) for p in bright]}}
198
 
199
  def qc_duplicates(imgs: List[Path], cfg: QCConfig) -> Dict:
200
+ # fastdup-only path
201
+ if fastdup is not None and len(imgs) > 50:
202
  try:
203
+ fd = fastdup.create(
204
+ input_dir=str(Path(imgs[0]).parent.parent),
205
+ work_dir=str(TMP_ROOT/'fastdup')
206
+ )
207
+ fd.run()
208
+ clusters = fd.get_clusters()
209
  dup = sum(len(c)-1 for c in clusters)
210
  return {"name":"Duplicates","score":100-dup/len(imgs)*100,
211
  "details":{"groups":clusters[:50]}}
212
+ except Exception as e:
213
+ return {"name":"Duplicates","score":100,
214
+ "details":{"fastdup_error":str(e)}}
215
+ # fallback skipped
216
+ return {"name":"Duplicates","score":100,
217
+ "details":{"note":"fastdup not available or small dataset"}}
 
 
 
 
 
 
 
218
 
219
  def qc_model_qa(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
220
  model = get_model(cfg.weights)
221
  if model is None:
222
  return {"name":"Model QA","score":100,"details":"skipped"}
223
  ious, mism = [], []
224
+ for i in range(0,len(imgs),cfg.batch_size):
225
  batch = imgs[i:i+cfg.batch_size]
226
  results = model.predict(batch, verbose=False, half=True, dynamic=True)
227
+ for p,res in zip(batch,results):
228
  gt = parse_label_file(p.parent.parent/'labels'/f"{p.stem}.txt")
229
  for cls,x,y,w,h in gt:
230
  best=0.0
231
  for b,c,conf in zip(res.boxes.xywh.cpu().numpy(),
232
  res.boxes.cls.cpu().numpy(),
233
  res.boxes.conf.cpu().numpy()):
234
+ if conf<cfg.conf_thr or int(c)!=cls: continue
235
+ best = max(best,_rel_iou((x,y,w,h),tuple(b)))
236
  ious.append(best)
237
+ if best<cfg.iou_thr: mism.append(str(p))
238
  miou = float(np.mean(ious)) if ious else 1.0
239
  return {"name":"Model QA","score":miou*100,
240
  "details":{"mean_iou":miou,"mismatches":mism[:50]}}
 
242
  def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
243
  if get_noise_indices is None:
244
  return {"name":"Label issues","score":100,"details":"cleanlab missing"}
245
+ labels, preds, idxs = [], [], []
246
+ model = get_model(cfg.weights)
247
+ for i,(img,lbl) in enumerate(zip(imgs,lbls)):
248
  bs = parse_label_file(lbl) if lbl else []
249
  for cls,*_ in bs:
250
  labels.append(int(cls)); idxs.append(i)
251
+ res = model.predict([img], verbose=False)[0]
252
+ pred_cls = int(res.boxes.cls.cpu().numpy()[0]) if len(res.boxes)>0 else -1
253
+ preds.append(pred_cls)
254
  if not labels:
255
  return {"name":"Label issues","score":100,"details":"no GT"}
256
  labels_arr = np.array(labels)
257
+ # one-hot dummy
258
+ uniq = sorted(set(labels_arr))
259
+ probs = np.eye(len(uniq))[np.searchsorted(uniq, labels_arr)]
260
  noise = get_noise_indices(labels=labels_arr, probabilities=probs)
261
+ flags = sorted({idxs[n] for n in noise})
262
+ files = [str(imgs[i]) for i in flags]
263
+ score = 100 - len(flags)/len(labels)*100
264
+ return {"name":"Label issues","score":score,
265
+ "details":{"files":files[:50]}}
266
+
267
+ def _rel_iou(b1,b2):
268
+ x1,y1,w1,h1=b1; x2,y2,w2,h2=b2
269
+ xa1,ya1,xa2,ya2=x1-w1/2,y1-h1/2,x1+w1/2,y1+h1/2
270
+ xb1,yb1,xb2,yb2=x2-w2/2,y2-h2/2,x2+w2/2,y2+h2/2
271
+ ix1,iy1,ix2,iy2=max(xa1,xb1),max(ya1,yb1),min(xa2,xb2),min(ya2,yb2)
272
+ inter=max(ix2-ix1,0)*max(iy2-iy1,0)
273
+ union=w1*h1+w2*h2-inter
274
  return inter/union if union else 0.0
275
 
276
  def aggregate(results: List[Dict]) -> float:
277
+ return sum(DEFAULT_W[r['name']]*r['score'] for r in results)
278
 
 
279
  RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
280
+
281
  def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
282
  m = RF_RE.match(url.strip())
283
  if not m:
284
  raise ValueError(f"Bad RF URL: {url}")
285
  ws, proj, ver = m.groups()
286
  ds_dir = dest/f"{ws}_{proj}_v{ver}"
287
+ if ds_dir.exists(): return ds_dir
288
+ pr = rf_api.workspace(ws).project(proj)
289
+ pr.version(int(ver)).download("yolov8", location=str(ds_dir))
 
290
  return ds_dir
291
 
 
292
  def run_quality(root: Path, yaml_file: Path | None, weights: Path | None, cfg: QCConfig) -> Tuple[str,pd.DataFrame]:
293
  imgs,lbls,meta = gather_dataset(root, yaml_file)
294
  results = [
 
300
  qc_label_issues(imgs,lbls,cfg),
301
  ]
302
  final = aggregate(results)
303
+ md = [f"## **{meta.get('name',root.name)}** β€” ScoreΒ {final:.1f}/100"]
304
  for r in results:
305
  md.append(f"### {r['name']}Β Β {r['score']:.1f}")
306
  md.append("<details><summary>details</summary>\n```json")
307
+ md.append(json.dumps(r['details'],indent=2))
308
  md.append("```\n</details>\n")
309
  df = pd.DataFrame.from_dict(
310
  next(r for r in results if r['name']=='Class balance')['details']['class_counts'],
 
319
 
320
  * Configurable blur, IOU & confidence thresholds
321
  * Cleanlab label-issue detection
322
+ * Fastdup-only duplicates (no hashing fallback)
323
  * Model caching for speed
324
  """)
325
  with gr.Row():
 
344
  reports, dfs = [], []
345
  cfg = QCConfig(blur_thr, iou_thr, conf_thr, weights.name if weights else None)
346
  rf = Roboflow(api_key) if api_key and Roboflow else None
 
347
  if url_txt:
348
  for line in Path(url_txt.name).read_text().splitlines():
349
  if not line.strip(): continue
 
353
  reports.append(md); dfs.append(df)
354
  except Exception as e:
355
  reports.append(f"### {line}\n⚠️ {e}")
 
356
  if zip_file:
357
  tmp = Path(tempfile.mkdtemp())
358
  shutil.unpack_archive(zip_file.name, tmp)
 
360
  Path(weights.name) if weights else None, cfg)
361
  reports.append(md); dfs.append(df)
362
  shutil.rmtree(tmp, ignore_errors=True)
 
363
  if server_path:
364
  ds = Path(server_path)
365
  md, df = run_quality(ds, Path(yaml_file.name) if yaml_file else None,