wuhp commited on
Commit
0a09f92
Β·
verified Β·
1 Parent(s): 39da2cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -92,7 +92,6 @@ def load_class_names(yaml_path: Path) -> List[str]:
92
  data = load_yaml(yaml_path)
93
  names = data.get("names", [])
94
  if isinstance(names, dict):
95
- # Convert { "0": "Person", ... } to ["Person", ...] by key order
96
  return [names[k] for k in sorted(names, key=lambda x: int(x))]
97
  return list(names)
98
 
@@ -187,7 +186,7 @@ def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
187
  for l in lbls[:cfg.sample_limit]:
188
  bs = parse_label_file(l) if l else []
189
  boxes.append(len(bs))
190
- counts.update(b[0] for b in bs)
191
  if not counts:
192
  return {"name": "Class balance", "score": 0, "details": "No labels"}
193
  bal = min(counts.values()) / max(counts.values()) * 100
@@ -237,8 +236,7 @@ def qc_duplicates(imgs: List[Path], cfg: QCConfig) -> Dict:
237
  fd.run()
238
  try:
239
  cc = fd.connected_components_grouped(sort_by="comp_size", ascending=False)
240
- clusters = (cc["files"].tolist() if "files" in cc.columns
241
- else cc.groupby("component")["filename"].apply(list).tolist())
242
  except Exception:
243
  clusters = fd.connected_components()
244
  dup = sum(len(c) - 1 for c in clusters)
@@ -311,7 +309,7 @@ def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
311
  def aggregate(results: List[Dict]) -> float:
312
  return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
313
 
314
- # ─────── New: gather actual per‑class image counts ─────────────────────────
315
  def gather_class_counts(
316
  dataset_info_list: List[Tuple[str, List[str], List[str], str]]
317
  ) -> Counter[str]:
@@ -322,12 +320,13 @@ def gather_class_counts(
322
  if not labels_dir.exists():
323
  continue
324
  for lp in labels_dir.rglob("*.txt"):
325
- for cls_id, *_ in parse_label_file(lp):
326
- if 0 <= cls_id < len(class_names):
327
- counts[class_names[cls_id]] += 1
 
328
  return counts
329
 
330
- # ────────────────── Roboflow TXT‑loading logic ─────────────────────────────
331
  RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
332
 
333
  def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
@@ -408,10 +407,11 @@ def merge_datasets(
408
  if not labels_root.exists():
409
  continue
410
  for lp in labels_root.rglob("*.txt"):
411
- im_name, cls_set = lp.stem + ".jpg", set()
412
- for cls_id, *rest in parse_label_file(lp):
413
- orig = class_names_dataset[int(cls_id)] if int(cls_id) < len(class_names_dataset) else None
414
- if orig:
 
415
  new = class_name_mapping.get(orig, orig)
416
  if new in active_classes:
417
  cls_set.add(new)
@@ -423,7 +423,7 @@ def merge_datasets(
423
  for c in cls_set:
424
  class_to_images[c].add(img_path)
425
 
426
- selected_images = set()
427
  counters = {c: 0 for c in active_classes}
428
  pool = [img for imgs in class_to_images.values() for img in imgs]
429
  random.shuffle(pool)
@@ -446,11 +446,16 @@ def merge_datasets(
446
  dst_lbl = out_dir / split / "labels" / lp_src.name
447
  dst_lbl.parent.mkdir(parents=True, exist_ok=True)
448
  lines = lp_src.read_text().splitlines()
449
- new_lines = []
450
  for line in lines:
451
  parts = line.split()
452
  cid = int(parts[0])
453
- orig = class_names_dataset[cid] if cid < len(class_names_dataset) else None
 
 
 
 
 
454
  merged = class_name_mapping.get(orig, orig) if orig else None
455
  if merged and merged in active_classes:
456
  new_id = id_map[merged]
@@ -479,7 +484,7 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
479
  _Evaluate β€’ Merge β€’ Edit β€’ Download_
480
  """)
481
 
482
- # ------------------------------ EVALUATE TAB --------------------------
483
  with gr.Tab("Evaluate"):
484
  api_in = gr.Textbox(label="Roboflow API key", type="password")
485
  url_txt = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
@@ -554,7 +559,7 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
554
  outputs=[out_md, out_df]
555
  )
556
 
557
- # ------------------------------ MERGE / EDIT TAB --------------------------
558
  with gr.Tab("Merge / Edit"):
559
  gr.Markdown("### 1️⃣ Load one or more datasets")
560
  rf_key = gr.Textbox(label="Roboflow API key", type="password")
 
92
  data = load_yaml(yaml_path)
93
  names = data.get("names", [])
94
  if isinstance(names, dict):
 
95
  return [names[k] for k in sorted(names, key=lambda x: int(x))]
96
  return list(names)
97
 
 
186
  for l in lbls[:cfg.sample_limit]:
187
  bs = parse_label_file(l) if l else []
188
  boxes.append(len(bs))
189
+ counts.update(int(b[0]) for b in bs)
190
  if not counts:
191
  return {"name": "Class balance", "score": 0, "details": "No labels"}
192
  bal = min(counts.values()) / max(counts.values()) * 100
 
236
  fd.run()
237
  try:
238
  cc = fd.connected_components_grouped(sort_by="comp_size", ascending=False)
239
+ clusters = cc["files"].tolist() if "files" in cc.columns else cc.groupby("component")["filename"].apply(list).tolist()
 
240
  except Exception:
241
  clusters = fd.connected_components()
242
  dup = sum(len(c) - 1 for c in clusters)
 
309
  def aggregate(results: List[Dict]) -> float:
310
  return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
311
 
312
+ # ───────── gathering actual per-class counts ────────────────────────────────
313
  def gather_class_counts(
314
  dataset_info_list: List[Tuple[str, List[str], List[str], str]]
315
  ) -> Counter[str]:
 
320
  if not labels_dir.exists():
321
  continue
322
  for lp in labels_dir.rglob("*.txt"):
323
+ for cls_id_float, *_ in parse_label_file(lp):
324
+ idx = int(cls_id_float)
325
+ if 0 <= idx < len(class_names):
326
+ counts[class_names[idx]] += 1
327
  return counts
328
 
329
+ # ───────────────── Roboflow TXT‑loading logic ─────────────────────────────
330
  RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
331
 
332
  def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
 
407
  if not labels_root.exists():
408
  continue
409
  for lp in labels_root.rglob("*.txt"):
410
+ cls_set: set[str] = set()
411
+ for cls_id_float, *rest in parse_label_file(lp):
412
+ idx = int(cls_id_float)
413
+ if 0 <= idx < len(class_names_dataset):
414
+ orig = class_names_dataset[idx]
415
  new = class_name_mapping.get(orig, orig)
416
  if new in active_classes:
417
  cls_set.add(new)
 
423
  for c in cls_set:
424
  class_to_images[c].add(img_path)
425
 
426
+ selected_images: set[str] = set()
427
  counters = {c: 0 for c in active_classes}
428
  pool = [img for imgs in class_to_images.values() for img in imgs]
429
  random.shuffle(pool)
 
446
  dst_lbl = out_dir / split / "labels" / lp_src.name
447
  dst_lbl.parent.mkdir(parents=True, exist_ok=True)
448
  lines = lp_src.read_text().splitlines()
449
+ new_lines: List[str] = []
450
  for line in lines:
451
  parts = line.split()
452
  cid = int(parts[0])
453
+ orig = None
454
+ # find which dataset tuple this lp_src belongs to, to get class_names_dataset
455
+ for dloc, class_names_dataset, splits, _ in dataset_info_list:
456
+ if str(lp_src).startswith(dloc):
457
+ orig = class_names_dataset[cid] if cid < len(class_names_dataset) else None
458
+ break
459
  merged = class_name_mapping.get(orig, orig) if orig else None
460
  if merged and merged in active_classes:
461
  new_id = id_map[merged]
 
484
  _Evaluate β€’ Merge β€’ Edit β€’ Download_
485
  """)
486
 
487
+ # Evaluate Tab ...
488
  with gr.Tab("Evaluate"):
489
  api_in = gr.Textbox(label="Roboflow API key", type="password")
490
  url_txt = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
 
559
  outputs=[out_md, out_df]
560
  )
561
 
562
+ # Merge / Edit Tab
563
  with gr.Tab("Merge / Edit"):
564
  gr.Markdown("### 1️⃣ Load one or more datasets")
565
  rf_key = gr.Textbox(label="Roboflow API key", type="password")