Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Sep 4 16:03:42 2023 | |
@author: SABARI | |
""" | |
import time | |
import tensorflow as tf | |
import numpy as np | |
#from lsnms import nms, wbc | |
def box_iou(box1, box2, eps=1e-7): | |
""" | |
Calculate intersection-over-union (IoU) of boxes. | |
Both sets of boxes are expected to be in (x1, y1, x2, y2) format. | |
Args: | |
box1 (tf.Tensor): A tensor of shape (N, 4) representing N bounding boxes. | |
box2 (tf.Tensor): A tensor of shape (M, 4) representing M bounding boxes. | |
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7. | |
Returns: | |
(tf.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2. | |
""" | |
a1, a2 = tf.split(box1, 2, axis=1) | |
b1, b2 = tf.split(box2, 2, axis=1) | |
inter = tf.reduce_prod(tf.maximum(tf.minimum(a2, b2) - tf.maximum(a1, b1), 0), axis=1) | |
return inter / (tf.reduce_prod(a2 - a1, axis=1) + tf.reduce_prod(b2 - b1, axis=1) - inter + eps) | |
def xywh2xyxy(x): | |
""" | |
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the | |
top-left corner and (x2, y2) is the bottom-right corner. | |
Args: | |
x (tf.Tensor): The input bounding box coordinates in (x, y, width, height) format. | |
Returns: | |
y (tf.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format. | |
""" | |
# Assuming x is a NumPy array | |
y = np.copy(x) | |
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x | |
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y | |
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x | |
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y | |
return y | |
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False, | |
multi_label=False, max_det=300, nc=0, # number of classes (optional) | |
max_time_img=0.05, | |
max_nms=100, | |
max_wh=7680): | |
""" | |
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box. | |
Arguments: | |
prediction (tf.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes) | |
containing the predicted boxes, classes, and masks. The tensor should be in the format | |
output by a model, such as YOLO. | |
conf_thres (float): The confidence threshold below which boxes will be filtered out. | |
Valid values are between 0.0 and 1.0. | |
iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS. | |
Valid values are between 0.0 and 1.0. | |
agnostic (bool): If True, the model is agnostic to the number of classes, and all | |
classes will be considered as one. | |
multi_label (bool): If True, each box may have multiple labels. | |
max_det (int): The maximum number of boxes to keep after NMS. | |
nc (int): (optional) The number of classes output by the model. Any indices after this will be considered masks. | |
max_time_img (float): The maximum time (seconds) for processing one image. | |
max_nms (int): The maximum number of boxes into tf.image.combined_non_max_suppression(). | |
max_wh (int): The maximum box width and height in pixels | |
Returns: | |
(List[tf.Tensor]): A list of length batch_size, where each element is a tensor of | |
shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns | |
(x1, y1, x2, y2, confidence, class, mask1, mask2, ...). | |
""" | |
# Checks | |
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' | |
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' | |
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out) | |
prediction = prediction[0] # select only inference output | |
bs = np.shape(prediction)[0] # batch size | |
nc = nc or (np.shape(prediction)[1] - 4) # number of classes | |
nm = np.shape(prediction)[1] - nc - 4 | |
mi = 4 + nc # mask start index | |
#xc = tf.math.reduce_any(prediction[:, 4:mi] > conf_thres, axis=1) # candidates | |
xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres | |
# Settings | |
# min_wh = 2 # (pixels) minimum box width and height | |
time_limit = 0.5 + max_time_img * tf.cast(bs, tf.float32) # seconds to quit after | |
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) | |
t = time.time() | |
output = [np.zeros((0, 6 + nm))] * bs | |
for xi, x in enumerate(prediction): # image index, image inference | |
# Apply constraints | |
# x = tf.where(tf.math.logical_or(x[:, 2:4] < min_wh, x[:, 2:4] > max_wh), tf.constant(0, dtype=tf.float32), x) # width-height | |
#x = tf.boolean_mask(x, xc[xi]) | |
#x = x.transpose(0, -1)[xc[xi]] # confidence | |
# Assuming x, xc, and xi are NumPy arrays | |
x = np.transpose(x) | |
#x = x.transpose()[:, xc[xi]] | |
x = x[xc[xi]] | |
# If none remain process next image | |
if np.shape(x)[0] == 0: | |
continue | |
# Detections matrix nx6 (xyxy, conf, cls) | |
#box, cls, mask = tf.split(x, [4, nc, nm], axis=1) | |
# Assuming x is a NumPy array | |
box = x[:, :4] | |
cls = x[:, 4:4 + nc] | |
mask = x[:, 4 + nc:] | |
box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2) | |
# Assuming cls is a NumPy array | |
if multi_label: | |
i, j = np.where(cls > conf_thres) | |
x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1) | |
else: | |
conf = np.max(cls, axis=1) | |
j = np.argmax(cls, axis=1) | |
keep = np.where(conf > conf_thres)[0] | |
x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1) | |
# Check shape | |
n = np.shape(x)[0] # number of boxes | |
if n == 0: # no boxes | |
continue | |
#x = x[tf.argsort(x[:, 4], direction='DESCENDING')[:max_nms]] # sort by confidence and remove excess boxes | |
sorted_indices = np.argsort(x[:, 4])[::-1] # Sort indices in descending order of confidence | |
x = x[sorted_indices[:max_nms]] # Keep the top max_nms boxes | |
# Batched NMS | |
c = x[:, 5:6] * (0.0 if agnostic else tf.cast(max_wh, tf.float32)) # classes | |
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores | |
i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres) # NMS | |
i = i.numpy() | |
i = i[:max_det] # limit detections | |
output[xi] = x[i,:] | |
if (time.time() - t) > time_limit: | |
break # time limit exceeded | |
return output | |
import numpy as np | |
def optimized_object_detection(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False, | |
multi_label=False, max_det=300, nc=0, max_time_img=0.05, | |
max_nms=100, max_wh=7680): | |
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' | |
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' | |
if isinstance(prediction, (list, tuple)): | |
prediction = prediction[0] | |
bs, _, _ = prediction.shape # Get batch size and dimensions | |
if nc == 0: | |
nc = prediction.shape[1] - 4 | |
nm = prediction.shape[1] - nc - 4 | |
mi = 4 + nc | |
xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres | |
time_limit = 0.5 + max_time_img * bs | |
multi_label &= nc > 1 | |
t = time.time() | |
output = [np.zeros((0, 6 + nm))] * bs | |
for xi, x in enumerate(prediction): | |
x = np.transpose(x) | |
x = x[xc[xi]] | |
if np.shape(x)[0] == 0: | |
continue | |
box = x[:, :4] | |
cls = x[:, 4:4 + nc] | |
mask = x[:, 4 + nc:] | |
box = xywh2xyxy(box) | |
if multi_label: | |
i, j = np.where(cls > conf_thres) | |
x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1) | |
else: | |
conf = np.max(cls, axis=1) | |
j = np.argmax(cls, axis=1) | |
keep = np.where(conf > conf_thres)[0] | |
x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1) | |
n = np.shape(x)[0] | |
if n == 0: | |
continue | |
sorted_indices = np.argsort(x[:, 4])[::-1] | |
x = x[sorted_indices[:max_nms]] | |
c = x[:, 5:6] * (0.0 if agnostic else max_wh) | |
boxes, scores = x[:, :4] + c, x[:, 4] | |
i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres) | |
#keep = nms(boxes, scores, iou_threshold=iou_thres) | |
i = i.numpy() | |
i = i[:max_det] | |
output[xi] = x[keep,:] | |
if (time.time() - t) > time_limit: | |
break | |
return output | |
#output_numpy = np.load(r"D:\object_face_person_detection\yolov8_tf_results\gustavo-alves-YOXSC4zRcxw-unsplash.npy") | |
#detections = non_max_suppression(output_numpy, conf_thres=0.4, iou_thres=0.4)[0] | |
#print(detections) | |