|
|
|
|
|
import json |
|
import random |
|
import shutil |
|
from collections import defaultdict |
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
from pathlib import Path |
|
|
|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
|
|
from ultralytics.utils import DATASETS_DIR, LOGGER, NUM_THREADS, TQDM |
|
from ultralytics.utils.downloads import download |
|
from ultralytics.utils.files import increment_path |
|
|
|
|
|
def coco91_to_coco80_class(): |
|
""" |
|
Converts 91-index COCO class IDs to 80-index COCO class IDs. |
|
|
|
Returns: |
|
(list): A list of 91 class IDs where the index represents the 80-index class ID and the value is the |
|
corresponding 91-index class ID. |
|
""" |
|
return [ |
|
0, |
|
1, |
|
2, |
|
3, |
|
4, |
|
5, |
|
6, |
|
7, |
|
8, |
|
9, |
|
10, |
|
None, |
|
11, |
|
12, |
|
13, |
|
14, |
|
15, |
|
16, |
|
17, |
|
18, |
|
19, |
|
20, |
|
21, |
|
22, |
|
23, |
|
None, |
|
24, |
|
25, |
|
None, |
|
None, |
|
26, |
|
27, |
|
28, |
|
29, |
|
30, |
|
31, |
|
32, |
|
33, |
|
34, |
|
35, |
|
36, |
|
37, |
|
38, |
|
39, |
|
None, |
|
40, |
|
41, |
|
42, |
|
43, |
|
44, |
|
45, |
|
46, |
|
47, |
|
48, |
|
49, |
|
50, |
|
51, |
|
52, |
|
53, |
|
54, |
|
55, |
|
56, |
|
57, |
|
58, |
|
59, |
|
None, |
|
60, |
|
None, |
|
None, |
|
61, |
|
None, |
|
62, |
|
63, |
|
64, |
|
65, |
|
66, |
|
67, |
|
68, |
|
69, |
|
70, |
|
71, |
|
72, |
|
None, |
|
73, |
|
74, |
|
75, |
|
76, |
|
77, |
|
78, |
|
79, |
|
None, |
|
] |
|
|
|
|
|
def coco80_to_coco91_class(): |
|
r""" |
|
Converts 80-index (val2014) to 91-index (paper). |
|
For details see https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/. |
|
|
|
Example: |
|
```python |
|
import numpy as np |
|
|
|
a = np.loadtxt("data/coco.names", dtype="str", delimiter="\n") |
|
b = np.loadtxt("data/coco_paper.names", dtype="str", delimiter="\n") |
|
x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco |
|
x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet |
|
``` |
|
""" |
|
return [ |
|
1, |
|
2, |
|
3, |
|
4, |
|
5, |
|
6, |
|
7, |
|
8, |
|
9, |
|
10, |
|
11, |
|
13, |
|
14, |
|
15, |
|
16, |
|
17, |
|
18, |
|
19, |
|
20, |
|
21, |
|
22, |
|
23, |
|
24, |
|
25, |
|
27, |
|
28, |
|
31, |
|
32, |
|
33, |
|
34, |
|
35, |
|
36, |
|
37, |
|
38, |
|
39, |
|
40, |
|
41, |
|
42, |
|
43, |
|
44, |
|
46, |
|
47, |
|
48, |
|
49, |
|
50, |
|
51, |
|
52, |
|
53, |
|
54, |
|
55, |
|
56, |
|
57, |
|
58, |
|
59, |
|
60, |
|
61, |
|
62, |
|
63, |
|
64, |
|
65, |
|
67, |
|
70, |
|
72, |
|
73, |
|
74, |
|
75, |
|
76, |
|
77, |
|
78, |
|
79, |
|
80, |
|
81, |
|
82, |
|
84, |
|
85, |
|
86, |
|
87, |
|
88, |
|
89, |
|
90, |
|
] |
|
|
|
|
|
def convert_coco( |
|
labels_dir="../coco/annotations/", |
|
save_dir="coco_converted/", |
|
use_segments=False, |
|
use_keypoints=False, |
|
cls91to80=True, |
|
lvis=False, |
|
): |
|
""" |
|
Converts COCO dataset annotations to a YOLO annotation format suitable for training YOLO models. |
|
|
|
Args: |
|
labels_dir (str, optional): Path to directory containing COCO dataset annotation files. |
|
save_dir (str, optional): Path to directory to save results to. |
|
use_segments (bool, optional): Whether to include segmentation masks in the output. |
|
use_keypoints (bool, optional): Whether to include keypoint annotations in the output. |
|
cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs. |
|
lvis (bool, optional): Whether to convert data in lvis dataset way. |
|
|
|
Example: |
|
```python |
|
from ultralytics.data.converter import convert_coco |
|
|
|
convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False) |
|
convert_coco( |
|
"../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True |
|
) |
|
``` |
|
|
|
Output: |
|
Generates output files in the specified output directory. |
|
""" |
|
|
|
save_dir = increment_path(save_dir) |
|
for p in save_dir / "labels", save_dir / "images": |
|
p.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
coco80 = coco91_to_coco80_class() |
|
|
|
|
|
for json_file in sorted(Path(labels_dir).resolve().glob("*.json")): |
|
lname = "" if lvis else json_file.stem.replace("instances_", "") |
|
fn = Path(save_dir) / "labels" / lname |
|
fn.mkdir(parents=True, exist_ok=True) |
|
if lvis: |
|
|
|
|
|
(fn / "train2017").mkdir(parents=True, exist_ok=True) |
|
(fn / "val2017").mkdir(parents=True, exist_ok=True) |
|
with open(json_file, encoding="utf-8") as f: |
|
data = json.load(f) |
|
|
|
|
|
images = {f"{x['id']:d}": x for x in data["images"]} |
|
|
|
imgToAnns = defaultdict(list) |
|
for ann in data["annotations"]: |
|
imgToAnns[ann["image_id"]].append(ann) |
|
|
|
image_txt = [] |
|
|
|
for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"): |
|
img = images[f"{img_id:d}"] |
|
h, w = img["height"], img["width"] |
|
f = str(Path(img["coco_url"]).relative_to("http://images.cocodataset.org")) if lvis else img["file_name"] |
|
if lvis: |
|
image_txt.append(str(Path("./images") / f)) |
|
|
|
bboxes = [] |
|
segments = [] |
|
keypoints = [] |
|
for ann in anns: |
|
if ann.get("iscrowd", False): |
|
continue |
|
|
|
box = np.array(ann["bbox"], dtype=np.float64) |
|
box[:2] += box[2:] / 2 |
|
box[[0, 2]] /= w |
|
box[[1, 3]] /= h |
|
if box[2] <= 0 or box[3] <= 0: |
|
continue |
|
|
|
cls = coco80[ann["category_id"] - 1] if cls91to80 else ann["category_id"] - 1 |
|
box = [cls] + box.tolist() |
|
if box not in bboxes: |
|
bboxes.append(box) |
|
if use_segments and ann.get("segmentation") is not None: |
|
if len(ann["segmentation"]) == 0: |
|
segments.append([]) |
|
continue |
|
elif len(ann["segmentation"]) > 1: |
|
s = merge_multi_segment(ann["segmentation"]) |
|
s = (np.concatenate(s, axis=0) / np.array([w, h])).reshape(-1).tolist() |
|
else: |
|
s = [j for i in ann["segmentation"] for j in i] |
|
s = (np.array(s).reshape(-1, 2) / np.array([w, h])).reshape(-1).tolist() |
|
s = [cls] + s |
|
segments.append(s) |
|
if use_keypoints and ann.get("keypoints") is not None: |
|
keypoints.append( |
|
box + (np.array(ann["keypoints"]).reshape(-1, 3) / np.array([w, h, 1])).reshape(-1).tolist() |
|
) |
|
|
|
|
|
with open((fn / f).with_suffix(".txt"), "a") as file: |
|
for i in range(len(bboxes)): |
|
if use_keypoints: |
|
line = (*(keypoints[i]),) |
|
else: |
|
line = ( |
|
*(segments[i] if use_segments and len(segments[i]) > 0 else bboxes[i]), |
|
) |
|
file.write(("%g " * len(line)).rstrip() % line + "\n") |
|
|
|
if lvis: |
|
with open((Path(save_dir) / json_file.name.replace("lvis_v1_", "").replace(".json", ".txt")), "a") as f: |
|
f.writelines(f"{line}\n" for line in image_txt) |
|
|
|
LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}") |
|
|
|
|
|
def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes): |
|
""" |
|
Converts a dataset of segmentation mask images to the YOLO segmentation format. |
|
|
|
This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format. |
|
The converted masks are saved in the specified output directory. |
|
|
|
Args: |
|
masks_dir (str): The path to the directory where all mask images (png, jpg) are stored. |
|
output_dir (str): The path to the directory where the converted YOLO segmentation masks will be stored. |
|
classes (int): Total classes in the dataset i.e. for COCO classes=80 |
|
|
|
Example: |
|
```python |
|
from ultralytics.data.converter import convert_segment_masks_to_yolo_seg |
|
|
|
# The classes here is the total classes in the dataset, for COCO dataset we have 80 classes |
|
convert_segment_masks_to_yolo_seg("path/to/masks_directory", "path/to/output/directory", classes=80) |
|
``` |
|
|
|
Notes: |
|
The expected directory structure for the masks is: |
|
|
|
- masks |
|
ββ mask_image_01.png or mask_image_01.jpg |
|
ββ mask_image_02.png or mask_image_02.jpg |
|
ββ mask_image_03.png or mask_image_03.jpg |
|
ββ mask_image_04.png or mask_image_04.jpg |
|
|
|
After execution, the labels will be organized in the following structure: |
|
|
|
- output_dir |
|
ββ mask_yolo_01.txt |
|
ββ mask_yolo_02.txt |
|
ββ mask_yolo_03.txt |
|
ββ mask_yolo_04.txt |
|
""" |
|
pixel_to_class_mapping = {i + 1: i for i in range(classes)} |
|
for mask_path in Path(masks_dir).iterdir(): |
|
if mask_path.suffix in {".png", ".jpg"}: |
|
mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE) |
|
img_height, img_width = mask.shape |
|
LOGGER.info(f"Processing {mask_path} imgsz = {img_height} x {img_width}") |
|
|
|
unique_values = np.unique(mask) |
|
yolo_format_data = [] |
|
|
|
for value in unique_values: |
|
if value == 0: |
|
continue |
|
class_index = pixel_to_class_mapping.get(value, -1) |
|
if class_index == -1: |
|
LOGGER.warning(f"Unknown class for pixel value {value} in file {mask_path}, skipping.") |
|
continue |
|
|
|
|
|
contours, _ = cv2.findContours( |
|
(mask == value).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE |
|
) |
|
|
|
for contour in contours: |
|
if len(contour) >= 3: |
|
contour = contour.squeeze() |
|
yolo_format = [class_index] |
|
for point in contour: |
|
|
|
yolo_format.append(round(point[0] / img_width, 6)) |
|
yolo_format.append(round(point[1] / img_height, 6)) |
|
yolo_format_data.append(yolo_format) |
|
|
|
output_path = Path(output_dir) / f"{mask_path.stem}.txt" |
|
with open(output_path, "w") as file: |
|
for item in yolo_format_data: |
|
line = " ".join(map(str, item)) |
|
file.write(line + "\n") |
|
LOGGER.info(f"Processed and stored at {output_path} imgsz = {img_height} x {img_width}") |
|
|
|
|
|
def convert_dota_to_yolo_obb(dota_root_path: str): |
|
""" |
|
Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format. |
|
|
|
The function processes images in the 'train' and 'val' folders of the DOTA dataset. For each image, it reads the |
|
associated label from the original labels directory and writes new labels in YOLO OBB format to a new directory. |
|
|
|
Args: |
|
dota_root_path (str): The root directory path of the DOTA dataset. |
|
|
|
Example: |
|
```python |
|
from ultralytics.data.converter import convert_dota_to_yolo_obb |
|
|
|
convert_dota_to_yolo_obb("path/to/DOTA") |
|
``` |
|
|
|
Notes: |
|
The directory structure assumed for the DOTA dataset: |
|
|
|
- DOTA |
|
ββ images |
|
β ββ train |
|
β ββ val |
|
ββ labels |
|
ββ train_original |
|
ββ val_original |
|
|
|
After execution, the function will organize the labels into: |
|
|
|
- DOTA |
|
ββ labels |
|
ββ train |
|
ββ val |
|
""" |
|
dota_root_path = Path(dota_root_path) |
|
|
|
|
|
class_mapping = { |
|
"plane": 0, |
|
"ship": 1, |
|
"storage-tank": 2, |
|
"baseball-diamond": 3, |
|
"tennis-court": 4, |
|
"basketball-court": 5, |
|
"ground-track-field": 6, |
|
"harbor": 7, |
|
"bridge": 8, |
|
"large-vehicle": 9, |
|
"small-vehicle": 10, |
|
"helicopter": 11, |
|
"roundabout": 12, |
|
"soccer-ball-field": 13, |
|
"swimming-pool": 14, |
|
"container-crane": 15, |
|
"airport": 16, |
|
"helipad": 17, |
|
} |
|
|
|
def convert_label(image_name, image_width, image_height, orig_label_dir, save_dir): |
|
"""Converts a single image's DOTA annotation to YOLO OBB format and saves it to a specified directory.""" |
|
orig_label_path = orig_label_dir / f"{image_name}.txt" |
|
save_path = save_dir / f"{image_name}.txt" |
|
|
|
with orig_label_path.open("r") as f, save_path.open("w") as g: |
|
lines = f.readlines() |
|
for line in lines: |
|
parts = line.strip().split() |
|
if len(parts) < 9: |
|
continue |
|
class_name = parts[8] |
|
class_idx = class_mapping[class_name] |
|
coords = [float(p) for p in parts[:8]] |
|
normalized_coords = [ |
|
coords[i] / image_width if i % 2 == 0 else coords[i] / image_height for i in range(8) |
|
] |
|
formatted_coords = [f"{coord:.6g}" for coord in normalized_coords] |
|
g.write(f"{class_idx} {' '.join(formatted_coords)}\n") |
|
|
|
for phase in ["train", "val"]: |
|
image_dir = dota_root_path / "images" / phase |
|
orig_label_dir = dota_root_path / "labels" / f"{phase}_original" |
|
save_dir = dota_root_path / "labels" / phase |
|
|
|
save_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
image_paths = list(image_dir.iterdir()) |
|
for image_path in TQDM(image_paths, desc=f"Processing {phase} images"): |
|
if image_path.suffix != ".png": |
|
continue |
|
image_name_without_ext = image_path.stem |
|
img = cv2.imread(str(image_path)) |
|
h, w = img.shape[:2] |
|
convert_label(image_name_without_ext, w, h, orig_label_dir, save_dir) |
|
|
|
|
|
def min_index(arr1, arr2): |
|
""" |
|
Find a pair of indexes with the shortest distance between two arrays of 2D points. |
|
|
|
Args: |
|
arr1 (np.ndarray): A NumPy array of shape (N, 2) representing N 2D points. |
|
arr2 (np.ndarray): A NumPy array of shape (M, 2) representing M 2D points. |
|
|
|
Returns: |
|
(tuple): A tuple containing the indexes of the points with the shortest distance in arr1 and arr2 respectively. |
|
""" |
|
dis = ((arr1[:, None, :] - arr2[None, :, :]) ** 2).sum(-1) |
|
return np.unravel_index(np.argmin(dis, axis=None), dis.shape) |
|
|
|
|
|
def merge_multi_segment(segments): |
|
""" |
|
Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment. |
|
This function connects these coordinates with a thin line to merge all segments into one. |
|
|
|
Args: |
|
segments (List[List]): Original segmentations in COCO's JSON file. |
|
Each element is a list of coordinates, like [segmentation1, segmentation2,...]. |
|
|
|
Returns: |
|
s (List[np.ndarray]): A list of connected segments represented as NumPy arrays. |
|
""" |
|
s = [] |
|
segments = [np.array(i).reshape(-1, 2) for i in segments] |
|
idx_list = [[] for _ in range(len(segments))] |
|
|
|
|
|
for i in range(1, len(segments)): |
|
idx1, idx2 = min_index(segments[i - 1], segments[i]) |
|
idx_list[i - 1].append(idx1) |
|
idx_list[i].append(idx2) |
|
|
|
|
|
for k in range(2): |
|
|
|
if k == 0: |
|
for i, idx in enumerate(idx_list): |
|
|
|
if len(idx) == 2 and idx[0] > idx[1]: |
|
idx = idx[::-1] |
|
segments[i] = segments[i][::-1, :] |
|
|
|
segments[i] = np.roll(segments[i], -idx[0], axis=0) |
|
segments[i] = np.concatenate([segments[i], segments[i][:1]]) |
|
|
|
if i in {0, len(idx_list) - 1}: |
|
s.append(segments[i]) |
|
else: |
|
idx = [0, idx[1] - idx[0]] |
|
s.append(segments[i][idx[0] : idx[1] + 1]) |
|
|
|
else: |
|
for i in range(len(idx_list) - 1, -1, -1): |
|
if i not in {0, len(idx_list) - 1}: |
|
idx = idx_list[i] |
|
nidx = abs(idx[1] - idx[0]) |
|
s.append(segments[i][nidx:]) |
|
return s |
|
|
|
|
|
def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt", device=None): |
|
""" |
|
Converts existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB) |
|
in YOLO format. Generates segmentation data using SAM auto-annotator as needed. |
|
|
|
Args: |
|
im_dir (str | Path): Path to image directory to convert. |
|
save_dir (str | Path): Path to save the generated labels, labels will be saved |
|
into `labels-segment` in the same directory level of `im_dir` if save_dir is None. Default: None. |
|
sam_model (str): Segmentation model to use for intermediate segmentation data; optional. |
|
device (int | str): The specific device to run SAM models. Default: None. |
|
|
|
Notes: |
|
The input directory structure assumed for dataset: |
|
|
|
- im_dir |
|
ββ 001.jpg |
|
ββ ... |
|
ββ NNN.jpg |
|
- labels |
|
ββ 001.txt |
|
ββ ... |
|
ββ NNN.txt |
|
""" |
|
from ultralytics import SAM |
|
from ultralytics.data import YOLODataset |
|
from ultralytics.utils import LOGGER |
|
from ultralytics.utils.ops import xywh2xyxy |
|
|
|
|
|
dataset = YOLODataset(im_dir, data=dict(names=list(range(1000)))) |
|
if len(dataset.labels[0]["segments"]) > 0: |
|
LOGGER.info("Segmentation labels detected, no need to generate new ones!") |
|
return |
|
|
|
LOGGER.info("Detection labels detected, generating segment labels by SAM model!") |
|
sam_model = SAM(sam_model) |
|
for label in TQDM(dataset.labels, total=len(dataset.labels), desc="Generating segment labels"): |
|
h, w = label["shape"] |
|
boxes = label["bboxes"] |
|
if len(boxes) == 0: |
|
continue |
|
boxes[:, [0, 2]] *= w |
|
boxes[:, [1, 3]] *= h |
|
im = cv2.imread(label["im_file"]) |
|
sam_results = sam_model(im, bboxes=xywh2xyxy(boxes), verbose=False, save=False, device=device) |
|
label["segments"] = sam_results[0].masks.xyn |
|
|
|
save_dir = Path(save_dir) if save_dir else Path(im_dir).parent / "labels-segment" |
|
save_dir.mkdir(parents=True, exist_ok=True) |
|
for label in dataset.labels: |
|
texts = [] |
|
lb_name = Path(label["im_file"]).with_suffix(".txt").name |
|
txt_file = save_dir / lb_name |
|
cls = label["cls"] |
|
for i, s in enumerate(label["segments"]): |
|
if len(s) == 0: |
|
continue |
|
line = (int(cls[i]), *s.reshape(-1)) |
|
texts.append(("%g " * len(line)).rstrip() % line) |
|
with open(txt_file, "a") as f: |
|
f.writelines(text + "\n" for text in texts) |
|
LOGGER.info(f"Generated segment labels saved in {save_dir}") |
|
|
|
|
|
def create_synthetic_coco_dataset(): |
|
""" |
|
Creates a synthetic COCO dataset with random images based on filenames from label lists. |
|
|
|
This function downloads COCO labels, reads image filenames from label list files, |
|
creates synthetic images for train2017 and val2017 subsets, and organizes |
|
them in the COCO dataset structure. It uses multithreading to generate images efficiently. |
|
|
|
Examples: |
|
>>> from ultralytics.data.converter import create_synthetic_coco_dataset |
|
>>> create_synthetic_coco_dataset() |
|
|
|
Notes: |
|
- Requires internet connection to download label files. |
|
- Generates random RGB images of varying sizes (480x480 to 640x640 pixels). |
|
- Existing test2017 directory is removed as it's not needed. |
|
- Reads image filenames from train2017.txt and val2017.txt files. |
|
""" |
|
|
|
def create_synthetic_image(image_file): |
|
"""Generates synthetic images with random sizes and colors for dataset augmentation or testing purposes.""" |
|
if not image_file.exists(): |
|
size = (random.randint(480, 640), random.randint(480, 640)) |
|
Image.new( |
|
"RGB", |
|
size=size, |
|
color=(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)), |
|
).save(image_file) |
|
|
|
|
|
dir = DATASETS_DIR / "coco" |
|
url = "https://github.com/ultralytics/assets/releases/download/v0.0.0/" |
|
label_zip = "coco2017labels-segments.zip" |
|
download([url + label_zip], dir=dir.parent) |
|
|
|
|
|
shutil.rmtree(dir / "labels" / "test2017", ignore_errors=True) |
|
with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: |
|
for subset in ["train2017", "val2017"]: |
|
subset_dir = dir / "images" / subset |
|
subset_dir.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
label_list_file = dir / f"{subset}.txt" |
|
if label_list_file.exists(): |
|
with open(label_list_file) as f: |
|
image_files = [dir / line.strip() for line in f] |
|
|
|
|
|
futures = [executor.submit(create_synthetic_image, image_file) for image_file in image_files] |
|
for _ in TQDM(as_completed(futures), total=len(futures), desc=f"Generating images for {subset}"): |
|
pass |
|
else: |
|
print(f"Warning: Labels file {label_list_file} does not exist. Skipping image creation for {subset}.") |
|
|
|
print("Synthetic COCO dataset created successfully.") |
|
|