Spaces:

rifatramadhani
/

cvtools

Running on Zero

App Files Files Community

rifatramadhani commited on 28 days ago

Commit

34e2c3f

1 Parent(s): 098cfe5

wip

Browse files

Files changed (6) hide show

age_estimation/age_estimation.py +13 -6
age_estimation/model.py +1 -0
age_estimation/predict.py +43 -13
app.py +8 -7
detection/face_detection.py +29 -14
detection/object_detection.py +8 -7

age_estimation/age_estimation.py CHANGED Viewed

@@ -22,14 +22,18 @@ def age_estimation(input_type, uploaded_image, image_url, base64_string):
         base64_string (str): The image base64 string (if input_type is "Enter Base64").
     Returns:
-        str: The estimated age, or an error message.
     """
     # Use the centralized function to get the image
     image = get_image_from_input(input_type, uploaded_image, image_url, base64_string)
     if image is None:
         print("Image is None after loading/selection for age estimation.")
-        return "Error: Image processing failed or no valid input provided."
     try:
         face_detector = load_face_detector()
@@ -44,10 +48,13 @@ def age_estimation(input_type, uploaded_image, image_url, base64_string):
         age_data = predict_age(processed_image, model, face_detector, device)
         if age_data:
-            # Assuming age_data is a list of dictionaries, and we take the first face's age
-            return f"Estimated Age: {age_data[0]['age']}"
         else:
-            return "No faces detected"
     except Exception as e:
         print(f"Error in age estimation: {e}")
-        return f"Error in age estimation: {e}"

         base64_string (str): The image base64 string (if input_type is "Enter Base64").
     Returns:
+        tuple: A tuple containing:
+            - str: A summary string of the estimated ages, or an error message.
+            - list: A list of dictionaries, where each dictionary represents the age
+                    estimation data for a detected face, or an empty list if no faces
+                    were detected or an error occurred.
     """
     # Use the centralized function to get the image
     image = get_image_from_input(input_type, uploaded_image, image_url, base64_string)
     if image is None:
         print("Image is None after loading/selection for age estimation.")
+        return "Error: Image processing failed or no valid input provided.", []
     try:
         face_detector = load_face_detector()
         age_data = predict_age(processed_image, model, face_detector, device)
         if age_data:
+            # Create a summary string of all estimated ages
+            age_summary = "Estimated Ages: " + ", ".join(
+                [str(face["age"]) for face in age_data]
+            )
+            return age_summary, age_data
         else:
+            return "No faces detected", []
     except Exception as e:
         print(f"Error in age estimation: {e}")
+        return f"Error in age estimation: {e}", []

age_estimation/model.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pretrainedmodels
 import torch
 import torch.nn as nn
 def get_model(model_name="se_resnext50_32x4d", num_classes=101, pretrained="imagenet"):
     """
     Loads a pre-trained model.

 import torch
 import torch.nn as nn
 def get_model(model_name="se_resnext50_32x4d", num_classes=101, pretrained="imagenet"):
     """
     Loads a pre-trained model.

age_estimation/predict.py CHANGED Viewed

@@ -7,8 +7,16 @@ import torch.nn.functional as F
 AGE_ESTIMATION_MARGIN = 0.4
 AGE_ESTIMATION_INPUT_SIZE = 224
 @torch.inference_mode()
-def predict_age(image, model, face_detector, device, margin=AGE_ESTIMATION_MARGIN, input_size=AGE_ESTIMATION_INPUT_SIZE):
     """
     Predicts the age of faces in an image.
@@ -22,6 +30,8 @@ def predict_age(image, model, face_detector, device, margin=AGE_ESTIMATION_MARGI
     Returns:
         list: A list of dictionaries containing the age and face coordinates for each detected face.
     """
     # Read the image using OpenCV
     # The image is already a NumPy array (HWC, BGR)
@@ -44,9 +54,15 @@ def predict_age(image, model, face_detector, device, margin=AGE_ESTIMATION_MARGI
     if len(detected) > 0:
         for i, d in enumerate(detected):
             # Get face coordinates and dimensions
-            x1, y1, x2, y2, w, h = d.left(), d.top(
-            ), d.right() + 1, d.bottom() + 1, d.width(), d.height()
             # Calculate expanded face region with margin
             xw1 = max(int(x1 - margin * w), 0)
             yw1 = max(int(y1 - margin * h), 0)
@@ -54,8 +70,9 @@ def predict_age(image, model, face_detector, device, margin=AGE_ESTIMATION_MARGI
             yw2 = min(int(y2 + margin * h), image_h - 1)
             # Resize face image to the required input size for the model
-            faces[i] = cv2.resize(image[yw1:yw2 + 1, xw1:xw2 + 1],
-                                  (input_size, input_size))
             # Draw rectangles around the detected face and the expanded region
             cv2.rectangle(image, (x1, y1), (x2, y2), (255, 255, 255), 2)
@@ -63,17 +80,30 @@ def predict_age(image, model, face_detector, device, margin=AGE_ESTIMATION_MARGI
         # Prepare face images for model input
         inputs = torch.from_numpy(
-            np.transpose(faces.astype(np.float32), (0, 3, 1, 2))).to(device)
         # Perform age prediction using the model
         outputs = F.softmax(model(inputs), dim=-1).cpu().numpy()
         ages = np.arange(0, 101)
         predicted_ages = (outputs * ages).sum(axis=-1)
-        # Store the predicted age and face coordinates
         for age, d in zip(predicted_ages, detected):
-            age_text = f'{int(age)}'
-            age_data.append({'age': int(age), 'text': age_text, 'face_coordinates': (d.left(), d.top())})
     # Return the list of age data for each detected face
-    return age_data

 AGE_ESTIMATION_MARGIN = 0.4
 AGE_ESTIMATION_INPUT_SIZE = 224
 @torch.inference_mode()
+def predict_age(
+    image,
+    model,
+    face_detector,
+    device,
+    margin=AGE_ESTIMATION_MARGIN,
+    input_size=AGE_ESTIMATION_INPUT_SIZE,
+):
     """
     Predicts the age of faces in an image.
     Returns:
         list: A list of dictionaries containing the age and face coordinates for each detected face.
+              The 'face_coordinates' key contains a dictionary with 'x', 'y', 'w', and 'h' keys
+              representing the bounding box of the detected face.
     """
     # Read the image using OpenCV
     # The image is already a NumPy array (HWC, BGR)
     if len(detected) > 0:
         for i, d in enumerate(detected):
             # Get face coordinates and dimensions
+            x1, y1, x2, y2, w, h = (
+                d.left(),
+                d.top(),
+                d.right() + 1,
+                d.bottom() + 1,
+                d.width(),
+                d.height(),
+            )
             # Calculate expanded face region with margin
             xw1 = max(int(x1 - margin * w), 0)
             yw1 = max(int(y1 - margin * h), 0)
             yw2 = min(int(y2 + margin * h), image_h - 1)
             # Resize face image to the required input size for the model
+            faces[i] = cv2.resize(
+                image[yw1 : yw2 + 1, xw1 : xw2 + 1], (input_size, input_size)
+            )
             # Draw rectangles around the detected face and the expanded region
             cv2.rectangle(image, (x1, y1), (x2, y2), (255, 255, 255), 2)
         # Prepare face images for model input
         inputs = torch.from_numpy(
+            np.transpose(faces.astype(np.float32), (0, 3, 1, 2))
+        ).to(device)
         # Perform age prediction using the model
         outputs = F.softmax(model(inputs), dim=-1).cpu().numpy()
         ages = np.arange(0, 101)
         predicted_ages = (outputs * ages).sum(axis=-1)
+        # Store the predicted age and face coordinates in [x, y, w, h] format
         for age, d in zip(predicted_ages, detected):
+            x, y, w, h = d.left(), d.top(), d.width(), d.height()
+            age_text = f"{int(age)}"
+            age_data.append(
+                {
+                    "age": int(age),
+                    "text": age_text,
+                    "face_coordinates": {
+                        "x": int(x),
+                        "y": int(y),
+                        "w": int(w),
+                        "h": int(h),
+                    },
+                }
+            )
     # Return the list of age data for each detected face
+    return age_data

app.py CHANGED Viewed

@@ -54,7 +54,7 @@ with gr.Blocks() as demo:
             inputs=[face_input_type],
             outputs=[face_img_upload, face_url_input, face_base64_input],
             queue=False,
-            api_name=False
         )
         # Link process button to the face detection function
@@ -94,8 +94,9 @@ with gr.Blocks() as demo:
         # Process Button
         age_process_btn = gr.Button("Estimate Age")
-        # Output Component
-        age_text_output = gr.Textbox(label="Estimated Age")
         # Link radio button change to visibility update function
         age_input_type.change(
@@ -103,15 +104,15 @@ with gr.Blocks() as demo:
             inputs=[age_input_type],
             outputs=[age_img_upload, age_url_input, age_base64_input],
             queue=False,
-            api_name=False
         )
         # Link process button to the age estimation function
-        # The age_estimation function will need to be updated to handle these inputs
         age_process_btn.click(
             fn=age_estimation,
             inputs=[age_input_type, age_img_upload, age_url_input, age_base64_input],
-            outputs=age_text_output,
         )
     # Create a tab for object detection
     with gr.Tab("Object Detection"):
@@ -146,7 +147,7 @@ with gr.Blocks() as demo:
             inputs=[obj_input_type],
             outputs=[obj_img_upload, obj_url_input, obj_base64_input],
             queue=False,
-            api_name=False
         )
         # Link process button to the object detection function

             inputs=[face_input_type],
             outputs=[face_img_upload, face_url_input, face_base64_input],
             queue=False,
+            api_name=False,
         )
         # Link process button to the face detection function
         # Process Button
         age_process_btn = gr.Button("Estimate Age")
+        # Output Components
+        age_text_output = gr.Textbox(label="Estimated Age Summary")
+        age_raw_output = gr.JSON(label="Raw Age Estimation Data")
         # Link radio button change to visibility update function
         age_input_type.change(
             inputs=[age_input_type],
             outputs=[age_img_upload, age_url_input, age_base64_input],
             queue=False,
+            api_name=False,
         )
         # Link process button to the age estimation function
+        # The age_estimation function will now return a tuple
         age_process_btn.click(
             fn=age_estimation,
             inputs=[age_input_type, age_img_upload, age_url_input, age_base64_input],
+            outputs=[age_text_output, age_raw_output],
         )
     # Create a tab for object detection
     with gr.Tab("Object Detection"):
             inputs=[obj_input_type],
             outputs=[obj_img_upload, obj_url_input, obj_base64_input],
             queue=False,
+            api_name=False,
         )
         # Link process button to the object detection function

detection/face_detection.py CHANGED Viewed

@@ -8,12 +8,17 @@ from PIL import Image
 # Local imports
 from utils.image_utils import load_image, preprocess_image, get_image_from_input
-from utils.face_detector import load_face_detector # Assuming this is the dlib detector loader
 # Define constants
 HAAR_CASCADE_FILENAME = "haarcascade_frontalface_default.xml"
-def face_detection(input_type, uploaded_image, image_url, base64_string, face_detection_method):
     """
     Performs face detection on the image from various input types using the selected method.
@@ -36,7 +41,7 @@ def face_detection(input_type, uploaded_image, image_url, base64_string, face_de
     if image is None:
         print("Image is None after loading/selection.")
-        return None, [] # Return None for image and empty list for bboxes
     processed_image = None
     bounding_boxes = []
@@ -54,20 +59,26 @@ def face_detection(input_type, uploaded_image, image_url, base64_string, face_de
                 # Ensure the haarcascade file is accessible.
                 # This path might need adjustment depending on the environment.
                 # Construct the full path to the Haar cascade file
-                cascade_path = os.path.join(cv2.data.haarcascades, HAAR_CASCADE_FILENAME)
                 # Check if the cascade file exists
                 if not os.path.exists(cascade_path):
-                     error_message = f"Error: Haar cascade file not found at {cascade_path}. Please ensure OpenCV is installed correctly and the file exists."
-                     print(error_message)
-                     return None, [] # Return None for image and empty list for bboxes
                 face_cascade = cv2.CascadeClassifier(cascade_path)
                 faces = face_cascade.detectMultiScale(gray, 1.1, 4)
                 for x, y, w, h in faces:
-                    cv2.rectangle(processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
-                    bounding_boxes.append({'x': int(x), 'y': int(y), 'w': int(w), 'h': int(h)})
             elif face_detection_method == "dlib":
                 print("Using dlib for face detection.")
@@ -75,15 +86,19 @@ def face_detection(input_type, uploaded_image, image_url, base64_string, face_de
                 # dlib works on RGB images, but the detector can take grayscale
                 # However, the rectangles are relative to the original image size
                 # Let's use the original processed_image (RGB numpy array) for drawing
-                faces = face_detector(processed_image, 1) # 1 is the upsample level
                 for face in faces:
                     x, y, w, h = face.left(), face.top(), face.width(), face.height()
-                    cv2.rectangle(processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
-                    bounding_boxes.append({'x': int(x), 'y': int(y), 'w': int(w), 'h': int(h)})
             return processed_image, bounding_boxes
         else:
-            return None, [] # Return None for image and empty list for bboxes
     except Exception as e:
         print(f"Error in face detection processing: {e}")
-        return None, [] # Return None for image and empty list for bboxes

 # Local imports
 from utils.image_utils import load_image, preprocess_image, get_image_from_input
+from utils.face_detector import (
+    load_face_detector,
+)  # Assuming this is the dlib detector loader
 # Define constants
 HAAR_CASCADE_FILENAME = "haarcascade_frontalface_default.xml"
+def face_detection(
+    input_type, uploaded_image, image_url, base64_string, face_detection_method
+):
     """
     Performs face detection on the image from various input types using the selected method.
     if image is None:
         print("Image is None after loading/selection.")
+        return None, []  # Return None for image and empty list for bboxes
     processed_image = None
     bounding_boxes = []
                 # Ensure the haarcascade file is accessible.
                 # This path might need adjustment depending on the environment.
                 # Construct the full path to the Haar cascade file
+                cascade_path = os.path.join(
+                    cv2.data.haarcascades, HAAR_CASCADE_FILENAME
+                )
                 # Check if the cascade file exists
                 if not os.path.exists(cascade_path):
+                    error_message = f"Error: Haar cascade file not found at {cascade_path}. Please ensure OpenCV is installed correctly and the file exists."
+                    print(error_message)
+                    return None, []  # Return None for image and empty list for bboxes
                 face_cascade = cv2.CascadeClassifier(cascade_path)
                 faces = face_cascade.detectMultiScale(gray, 1.1, 4)
                 for x, y, w, h in faces:
+                    cv2.rectangle(
+                        processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2
+                    )
+                    bounding_boxes.append(
+                        {"x": int(x), "y": int(y), "w": int(w), "h": int(h)}
+                    )
             elif face_detection_method == "dlib":
                 print("Using dlib for face detection.")
                 # dlib works on RGB images, but the detector can take grayscale
                 # However, the rectangles are relative to the original image size
                 # Let's use the original processed_image (RGB numpy array) for drawing
+                faces = face_detector(processed_image, 1)  # 1 is the upsample level
                 for face in faces:
                     x, y, w, h = face.left(), face.top(), face.width(), face.height()
+                    cv2.rectangle(
+                        processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2
+                    )
+                    bounding_boxes.append(
+                        {"x": int(x), "y": int(y), "w": int(w), "h": int(h)}
+                    )
             return processed_image, bounding_boxes
         else:
+            return None, []  # Return None for image and empty list for bboxes
     except Exception as e:
         print(f"Error in face detection processing: {e}")
+        return None, []  # Return None for image and empty list for bboxes

detection/object_detection.py CHANGED Viewed

@@ -8,6 +8,7 @@ import numpy as np
 # Local imports
 from utils.image_utils import load_image, preprocess_image
 def object_detection(input_type, uploaded_image, image_url, base64_string):
     """
     Performs object detection on the image from various input types.
@@ -25,26 +26,26 @@ def object_detection(input_type, uploaded_image, image_url, base64_string):
     input_value = None
     if input_type == "Upload File" and uploaded_image is not None:
-        image = uploaded_image # This is a PIL Image
-        print("Using uploaded image (PIL) for object detection") # Debug print
     elif input_type == "Enter URL" and image_url and image_url.strip():
         input_value = image_url
-        print(f"Using URL for object detection: {input_value}") # Debug print
     elif input_type == "Enter Base64" and base64_string and base64_string.strip():
         input_value = base64_string
-        print(f"Using Base64 string for object detection") # Debug print
     else:
         print("No valid input provided for object detection based on selected type.")
-        return None # No valid input
     # If input_value is set (URL or Base64), use load_image
     if input_value:
         image = load_image(input_value)
         if image is None:
-            return None # load_image failed
     # Now 'image' should be a PIL Image or None
     if image is None:
@@ -62,4 +63,4 @@ def object_detection(input_type, uploaded_image, image_url, base64_string):
         return processed_image
     except Exception as e:
         print(f"Error in object detection processing: {e}")
-        return None

 # Local imports
 from utils.image_utils import load_image, preprocess_image
 def object_detection(input_type, uploaded_image, image_url, base64_string):
     """
     Performs object detection on the image from various input types.
     input_value = None
     if input_type == "Upload File" and uploaded_image is not None:
+        image = uploaded_image  # This is a PIL Image
+        print("Using uploaded image (PIL) for object detection")  # Debug print
     elif input_type == "Enter URL" and image_url and image_url.strip():
         input_value = image_url
+        print(f"Using URL for object detection: {input_value}")  # Debug print
     elif input_type == "Enter Base64" and base64_string and base64_string.strip():
         input_value = base64_string
+        print(f"Using Base64 string for object detection")  # Debug print
     else:
         print("No valid input provided for object detection based on selected type.")
+        return None  # No valid input
     # If input_value is set (URL or Base64), use load_image
     if input_value:
         image = load_image(input_value)
         if image is None:
+            return None  # load_image failed
     # Now 'image' should be a PIL Image or None
     if image is None:
         return processed_image
     except Exception as e:
         print(f"Error in object detection processing: {e}")
+        return None