Update app.py
Browse files
app.py
CHANGED
@@ -92,7 +92,6 @@ def load_class_names(yaml_path: Path) -> List[str]:
|
|
92 |
data = load_yaml(yaml_path)
|
93 |
names = data.get("names", [])
|
94 |
if isinstance(names, dict):
|
95 |
-
# Convert { "0": "Person", ... } to ["Person", ...] by key order
|
96 |
return [names[k] for k in sorted(names, key=lambda x: int(x))]
|
97 |
return list(names)
|
98 |
|
@@ -187,7 +186,7 @@ def qc_class_balance(lbls: List[Path], cfg: QCConfig) -> Dict:
|
|
187 |
for l in lbls[:cfg.sample_limit]:
|
188 |
bs = parse_label_file(l) if l else []
|
189 |
boxes.append(len(bs))
|
190 |
-
counts.update(b[0] for b in bs)
|
191 |
if not counts:
|
192 |
return {"name": "Class balance", "score": 0, "details": "No labels"}
|
193 |
bal = min(counts.values()) / max(counts.values()) * 100
|
@@ -237,8 +236,7 @@ def qc_duplicates(imgs: List[Path], cfg: QCConfig) -> Dict:
|
|
237 |
fd.run()
|
238 |
try:
|
239 |
cc = fd.connected_components_grouped(sort_by="comp_size", ascending=False)
|
240 |
-
clusters =
|
241 |
-
else cc.groupby("component")["filename"].apply(list).tolist())
|
242 |
except Exception:
|
243 |
clusters = fd.connected_components()
|
244 |
dup = sum(len(c) - 1 for c in clusters)
|
@@ -311,7 +309,7 @@ def qc_label_issues(imgs: List[Path], lbls: List[Path], cfg: QCConfig) -> Dict:
|
|
311 |
def aggregate(results: List[Dict]) -> float:
|
312 |
return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
|
313 |
|
314 |
-
#
|
315 |
def gather_class_counts(
|
316 |
dataset_info_list: List[Tuple[str, List[str], List[str], str]]
|
317 |
) -> Counter[str]:
|
@@ -322,12 +320,13 @@ def gather_class_counts(
|
|
322 |
if not labels_dir.exists():
|
323 |
continue
|
324 |
for lp in labels_dir.rglob("*.txt"):
|
325 |
-
for
|
326 |
-
|
327 |
-
|
|
|
328 |
return counts
|
329 |
|
330 |
-
#
|
331 |
RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
|
332 |
|
333 |
def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
|
@@ -408,10 +407,11 @@ def merge_datasets(
|
|
408 |
if not labels_root.exists():
|
409 |
continue
|
410 |
for lp in labels_root.rglob("*.txt"):
|
411 |
-
|
412 |
-
for
|
413 |
-
|
414 |
-
if
|
|
|
415 |
new = class_name_mapping.get(orig, orig)
|
416 |
if new in active_classes:
|
417 |
cls_set.add(new)
|
@@ -423,7 +423,7 @@ def merge_datasets(
|
|
423 |
for c in cls_set:
|
424 |
class_to_images[c].add(img_path)
|
425 |
|
426 |
-
selected_images = set()
|
427 |
counters = {c: 0 for c in active_classes}
|
428 |
pool = [img for imgs in class_to_images.values() for img in imgs]
|
429 |
random.shuffle(pool)
|
@@ -446,11 +446,16 @@ def merge_datasets(
|
|
446 |
dst_lbl = out_dir / split / "labels" / lp_src.name
|
447 |
dst_lbl.parent.mkdir(parents=True, exist_ok=True)
|
448 |
lines = lp_src.read_text().splitlines()
|
449 |
-
new_lines = []
|
450 |
for line in lines:
|
451 |
parts = line.split()
|
452 |
cid = int(parts[0])
|
453 |
-
orig =
|
|
|
|
|
|
|
|
|
|
|
454 |
merged = class_name_mapping.get(orig, orig) if orig else None
|
455 |
if merged and merged in active_classes:
|
456 |
new_id = id_map[merged]
|
@@ -479,7 +484,7 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
|
|
479 |
_Evaluate β’ Merge β’ Edit β’ Download_
|
480 |
""")
|
481 |
|
482 |
-
#
|
483 |
with gr.Tab("Evaluate"):
|
484 |
api_in = gr.Textbox(label="Roboflow API key", type="password")
|
485 |
url_txt = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
|
@@ -554,7 +559,7 @@ with gr.Blocks(css="#classdf td{min-width:120px}") as demo:
|
|
554 |
outputs=[out_md, out_df]
|
555 |
)
|
556 |
|
557 |
-
#
|
558 |
with gr.Tab("Merge / Edit"):
|
559 |
gr.Markdown("### 1οΈβ£Β Load one or more datasets")
|
560 |
rf_key = gr.Textbox(label="Roboflow API key", type="password")
|
|
|
92 |
data = load_yaml(yaml_path)
|
93 |
names = data.get("names", [])
|
94 |
if isinstance(names, dict):
|
|
|
95 |
return [names[k] for k in sorted(names, key=lambda x: int(x))]
|
96 |
return list(names)
|
97 |
|
|
|
186 |
for l in lbls[:cfg.sample_limit]:
|
187 |
bs = parse_label_file(l) if l else []
|
188 |
boxes.append(len(bs))
|
189 |
+
counts.update(int(b[0]) for b in bs)
|
190 |
if not counts:
|
191 |
return {"name": "Class balance", "score": 0, "details": "No labels"}
|
192 |
bal = min(counts.values()) / max(counts.values()) * 100
|
|
|
236 |
fd.run()
|
237 |
try:
|
238 |
cc = fd.connected_components_grouped(sort_by="comp_size", ascending=False)
|
239 |
+
clusters = cc["files"].tolist() if "files" in cc.columns else cc.groupby("component")["filename"].apply(list).tolist()
|
|
|
240 |
except Exception:
|
241 |
clusters = fd.connected_components()
|
242 |
dup = sum(len(c) - 1 for c in clusters)
|
|
|
309 |
def aggregate(results: List[Dict]) -> float:
|
310 |
return sum(DEFAULT_W[r["name"]]*r["score"] for r in results)
|
311 |
|
312 |
+
# βββββββββ gathering actual per-class counts ββββββββββββββββββββββββββββββββ
|
313 |
def gather_class_counts(
|
314 |
dataset_info_list: List[Tuple[str, List[str], List[str], str]]
|
315 |
) -> Counter[str]:
|
|
|
320 |
if not labels_dir.exists():
|
321 |
continue
|
322 |
for lp in labels_dir.rglob("*.txt"):
|
323 |
+
for cls_id_float, *_ in parse_label_file(lp):
|
324 |
+
idx = int(cls_id_float)
|
325 |
+
if 0 <= idx < len(class_names):
|
326 |
+
counts[class_names[idx]] += 1
|
327 |
return counts
|
328 |
|
329 |
+
# βββββββββββββββββ Roboflow TXTβloading logic βββββββββββββββββββββββββββββ
|
330 |
RF_RE = re.compile(r"https?://universe\.roboflow\.com/([^/]+)/([^/]+)/dataset/(\d+)")
|
331 |
|
332 |
def download_rf_dataset(url: str, rf_api: Roboflow, dest: Path) -> Path:
|
|
|
407 |
if not labels_root.exists():
|
408 |
continue
|
409 |
for lp in labels_root.rglob("*.txt"):
|
410 |
+
cls_set: set[str] = set()
|
411 |
+
for cls_id_float, *rest in parse_label_file(lp):
|
412 |
+
idx = int(cls_id_float)
|
413 |
+
if 0 <= idx < len(class_names_dataset):
|
414 |
+
orig = class_names_dataset[idx]
|
415 |
new = class_name_mapping.get(orig, orig)
|
416 |
if new in active_classes:
|
417 |
cls_set.add(new)
|
|
|
423 |
for c in cls_set:
|
424 |
class_to_images[c].add(img_path)
|
425 |
|
426 |
+
selected_images: set[str] = set()
|
427 |
counters = {c: 0 for c in active_classes}
|
428 |
pool = [img for imgs in class_to_images.values() for img in imgs]
|
429 |
random.shuffle(pool)
|
|
|
446 |
dst_lbl = out_dir / split / "labels" / lp_src.name
|
447 |
dst_lbl.parent.mkdir(parents=True, exist_ok=True)
|
448 |
lines = lp_src.read_text().splitlines()
|
449 |
+
new_lines: List[str] = []
|
450 |
for line in lines:
|
451 |
parts = line.split()
|
452 |
cid = int(parts[0])
|
453 |
+
orig = None
|
454 |
+
# find which dataset tuple this lp_src belongs to, to get class_names_dataset
|
455 |
+
for dloc, class_names_dataset, splits, _ in dataset_info_list:
|
456 |
+
if str(lp_src).startswith(dloc):
|
457 |
+
orig = class_names_dataset[cid] if cid < len(class_names_dataset) else None
|
458 |
+
break
|
459 |
merged = class_name_mapping.get(orig, orig) if orig else None
|
460 |
if merged and merged in active_classes:
|
461 |
new_id = id_map[merged]
|
|
|
484 |
_Evaluate β’ Merge β’ Edit β’ Download_
|
485 |
""")
|
486 |
|
487 |
+
# Evaluate Tab ...
|
488 |
with gr.Tab("Evaluate"):
|
489 |
api_in = gr.Textbox(label="Roboflow API key", type="password")
|
490 |
url_txt = gr.File(label=".txt of RF dataset URLs", file_types=['.txt'])
|
|
|
559 |
outputs=[out_md, out_df]
|
560 |
)
|
561 |
|
562 |
+
# Merge / Edit Tab
|
563 |
with gr.Tab("Merge / Edit"):
|
564 |
gr.Markdown("### 1οΈβ£Β Load one or more datasets")
|
565 |
rf_key = gr.Textbox(label="Roboflow API key", type="password")
|