Spaces:
Running
on
Zero
Running
on
Zero
import cv2 | |
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
AGE_ESTIMATION_MARGIN = 0.4 | |
AGE_ESTIMATION_INPUT_SIZE = 224 | |
def predict_age( | |
image, | |
model, | |
face_detector, | |
device, | |
margin=AGE_ESTIMATION_MARGIN, | |
input_size=AGE_ESTIMATION_INPUT_SIZE, | |
): | |
""" | |
Predicts the age of faces in an image. | |
Args: | |
image (numpy.ndarray): The image as a NumPy array (HWC, BGR). | |
model (torch.nn.Module): The age estimation model. | |
face_detector (dlib.detector): The dlib face detector. | |
device (torch.device): The device to run the model on. | |
margin (float): The margin to add around the detected face. | |
input_size (int): The size of the input image for the model. | |
Returns: | |
list: A list of dictionaries containing the age and face coordinates for each detected face. | |
The 'face_coordinates' key contains a dictionary with 'x', 'y', 'w', and 'h' keys | |
representing the bounding box of the detected face. | |
""" | |
# Read the image using OpenCV | |
# The image is already a NumPy array (HWC, BGR) | |
# Ensure it's in the correct color space if needed by dlib or subsequent steps | |
# dlib's detector can work on grayscale or RGB. The current code uses the BGR array directly. | |
# Let's keep it as is for now, assuming the input array is BGR as produced by cv2 or similar. | |
# If preprocess_image returns RGB, we might need a conversion here or in preprocess_image. | |
# Checking utils/image_utils.py, preprocess_image converts to RGB PIL, then to numpy array. | |
# PIL to numpy conversion usually results in RGB. cv2 expects BGR. | |
# Let's convert the input image (assumed RGB from preprocess_image) to BGR for cv2 operations. | |
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | |
image_h, image_w = image.shape[:2] | |
# Detect faces in the image using the dlib face detector | |
detected = face_detector(image, 3) | |
faces = np.empty((len(detected), input_size, input_size, 3)) | |
age_data = [] | |
# Process each detected face | |
if len(detected) > 0: | |
for i, d in enumerate(detected): | |
# Get face coordinates and dimensions | |
x1, y1, x2, y2, w, h = ( | |
d.left(), | |
d.top(), | |
d.right() + 1, | |
d.bottom() + 1, | |
d.width(), | |
d.height(), | |
) | |
# Calculate expanded face region with margin | |
xw1 = max(int(x1 - margin * w), 0) | |
yw1 = max(int(y1 - margin * h), 0) | |
xw2 = min(int(x2 + margin * w), image_w - 1) | |
yw2 = min(int(y2 + margin * h), image_h - 1) | |
# Resize face image to the required input size for the model | |
faces[i] = cv2.resize( | |
image[yw1 : yw2 + 1, xw1 : xw2 + 1], (input_size, input_size) | |
) | |
# Draw rectangles around the detected face and the expanded region | |
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 255, 255), 2) | |
cv2.rectangle(image, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2) | |
# Prepare face images for model input | |
inputs = torch.from_numpy( | |
np.transpose(faces.astype(np.float32), (0, 3, 1, 2)) | |
).to(device) | |
# Perform age prediction using the model | |
outputs = F.softmax(model(inputs), dim=-1).cpu().numpy() | |
ages = np.arange(0, 101) | |
predicted_ages = (outputs * ages).sum(axis=-1) | |
# Store the predicted age and face coordinates in [x, y, w, h] format | |
for age, d in zip(predicted_ages, detected): | |
x, y, w, h = d.left(), d.top(), d.width(), d.height() | |
age_text = f"{int(age)}" | |
age_data.append( | |
{ | |
"age": int(age), | |
"text": age_text, | |
"face_coordinates": { | |
"x": int(x), | |
"y": int(y), | |
"w": int(w), | |
"h": int(h), | |
}, | |
} | |
) | |
# Return the list of age data for each detected face | |
return age_data | |