wuhp commited on
Commit
39da2cc
Β·
verified Β·
1 Parent(s): bd771a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -5
app.py CHANGED
@@ -92,6 +92,7 @@ def load_class_names(yaml_path: Path) -> List[str]:
92
  data = load_yaml(yaml_path)
93
  names = data.get("names", [])
94
  if isinstance(names, dict):
 
95
  return [names[k] for k in sorted(names, key=lambda x: int(x))]
96
  return list(names)
97
 
@@ -310,7 +311,23 @@ def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
310
  def aggregate(results: List[Dict]) -> float:
311
  return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
312
 
313
- # ───────────────── Roboflow TXT‑loading logic for both tabs ─────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
315
 
316
  def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
@@ -325,6 +342,7 @@ def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
325
  pr.version(int(ver)).download("yolov8", location=str(ds_dir))
326
  return ds_dir
327
 
 
328
  def run_quality(
329
  root: Path,
330
  yaml_file: Path | None,
@@ -350,7 +368,7 @@ def run_quality(
350
  md.append(json.dumps(r["details"], indent=2))
351
  md.append("```\n</details>\n")
352
  df = pd.DataFrame.from_dict(
353
- next(r for r in results if r["name"]=="Class balance")["details"]["class_counts"],
354
  orient="index", columns=["count"]
355
  )
356
  df.index.name = "class"
@@ -461,6 +479,7 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
461
  _Evaluate β€’ Merge β€’ Edit β€’ Download_
462
  """)
463
 
 
464
  with gr.Tab("Evaluate"):
465
  api_in = gr.Textbox(label="Roboflow API key", type="password")
466
  url_txt = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
@@ -535,6 +554,7 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
535
  outputs=[out_md, out_df]
536
  )
537
 
 
538
  with gr.Tab("Merge / Edit"):
539
  gr.Markdown("### 1️⃣ Load one or more datasets")
540
  rf_key = gr.Textbox(label="Roboflow API key", type="password")
@@ -570,7 +590,7 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
570
  yaml_p = next(tmp.rglob("*.yaml"), None)
571
  if yaml_p:
572
  names = load_class_names(yaml_p)
573
- splits= [s for s in ("train","valid","test") if (tmp/s).exists()]
574
  info_list.append((str(tmp), names, splits, tmp.name))
575
  log_lines.append(f"βœ”οΈ ZIP **{tmp.name}** loaded")
576
 
@@ -587,11 +607,12 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
587
  refresh_btn = gr.Button("Build class table from loaded datasets")
588
 
589
  def _build_class_df(ds_info):
590
- all_names = sorted({n for _, names, _, _ in ds_info for n in names})
 
591
  return pd.DataFrame({
592
  "original_class": all_names,
593
  "new_name": all_names,
594
- "max_images": [99999]*len(all_names),
595
  "remove": [False]*len(all_names),
596
  })
597
 
 
92
  data = load_yaml(yaml_path)
93
  names = data.get("names", [])
94
  if isinstance(names, dict):
95
+ # Convert { "0": "Person", ... } to ["Person", ...] by key order
96
  return [names[k] for k in sorted(names, key=lambda x: int(x))]
97
  return list(names)
98
 
 
311
  def aggregate(results: List[Dict]) -> float:
312
  return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
313
 
314
+ # ─────── New: gather actual per‑class image counts ─────────────────────────
315
+ def gather_class_counts(
316
+ dataset_info_list: List[Tuple[str, List[str], List[str], str]]
317
+ ) -> Counter[str]:
318
+ counts: Counter[str] = Counter()
319
+ for dloc, class_names, splits, _ in dataset_info_list:
320
+ for split in splits:
321
+ labels_dir = Path(dloc) / split / "labels"
322
+ if not labels_dir.exists():
323
+ continue
324
+ for lp in labels_dir.rglob("*.txt"):
325
+ for cls_id, *_ in parse_label_file(lp):
326
+ if 0 <= cls_id < len(class_names):
327
+ counts[class_names[cls_id]] += 1
328
+ return counts
329
+
330
+ # ────────────────── Roboflow TXT‑loading logic ─────────────────────────────
331
  RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
332
 
333
  def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
 
342
  pr.version(int(ver)).download("yolov8", location=str(ds_dir))
343
  return ds_dir
344
 
345
+ # ───────────────── run_quality & merge_datasets ────────────────────────────
346
  def run_quality(
347
  root: Path,
348
  yaml_file: Path | None,
 
368
  md.append(json.dumps(r["details"], indent=2))
369
  md.append("```\n</details>\n")
370
  df = pd.DataFrame.from_dict(
371
+ next(r for r in results if r["name"] == "Class balance")["details"]["class_counts"],
372
  orient="index", columns=["count"]
373
  )
374
  df.index.name = "class"
 
479
  _Evaluate β€’ Merge β€’ Edit β€’ Download_
480
  """)
481
 
482
+ # ------------------------------ EVALUATE TAB --------------------------
483
  with gr.Tab("Evaluate"):
484
  api_in = gr.Textbox(label="Roboflow API key", type="password")
485
  url_txt = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
 
554
  outputs=[out_md, out_df]
555
  )
556
 
557
+ # ------------------------------ MERGE / EDIT TAB --------------------------
558
  with gr.Tab("Merge / Edit"):
559
  gr.Markdown("### 1️⃣ Load one or more datasets")
560
  rf_key = gr.Textbox(label="Roboflow API key", type="password")
 
590
  yaml_p = next(tmp.rglob("*.yaml"), None)
591
  if yaml_p:
592
  names = load_class_names(yaml_p)
593
+ splits = [s for s in ("train","valid","test") if (tmp/s).exists()]
594
  info_list.append((str(tmp), names, splits, tmp.name))
595
  log_lines.append(f"βœ”οΈ ZIP **{tmp.name}** loaded")
596
 
 
607
  refresh_btn = gr.Button("Build class table from loaded datasets")
608
 
609
  def _build_class_df(ds_info):
610
+ counts = gather_class_counts(ds_info)
611
+ all_names = sorted(counts.keys())
612
  return pd.DataFrame({
613
  "original_class": all_names,
614
  "new_name": all_names,
615
+ "max_images": [counts[n] for n in all_names],
616
  "remove": [False]*len(all_names),
617
  })
618