Spaces:

rifatramadhani
/

cvtools

Running on Zero

File size: 4,638 Bytes

e70400c
 
 
 
ef654f1
e70400c
 
 
 
085be2c
34e2c3f
085be2c
 
ef654f1
085be2c
 
ef654f1
085be2c
2d3e7bb
ef654f1
 
e70400c
 
085be2c
e70400c
 
 
 
 
 
 
 
ef654f1
 
 
e70400c
ef654f1
085be2c
2d3e7bb
ef654f1
e70400c
 
 
 
34e2c3f
 
e70400c
 
 
34e2c3f
e70400c
 
 
2d3e7bb
e70400c
 
 
2d3e7bb
e70400c
 
 
 
 
2d3e7bb
e70400c
 
 
 
2d3e7bb
e70400c
 
 
ef654f1
 
 
 
 
 
 
 
 
 
 
 
2d3e7bb
 
 
ef654f1
 
2d3e7bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef654f1
 
2d3e7bb
 
 
ef654f1
 
2d3e7bb
e70400c
 
085be2c
2d3e7bb

# Standard library imports
# (Add any necessary imports for future object detection implementation)

# Third-party imports
from ultralytics import YOLO

# Local imports
from utils.image_utils import load_image, preprocess_image

YOLO_MODEL = "yolo11n.pt"

# Load the YOLO model globally to avoid reloading on each function call
# Using a common pre-trained YOLO nano model ('yolov8n.pt')
try:
    model = YOLO(YOLO_MODEL)
    print("YOLO model loaded successfully.")
except Exception as e:
    print(f"Error loading YOLO model: {e}")
    model = None  # Set model to None if loading fails


def object_detection(input_type, uploaded_image, image_url, base64_string):
    """
    Performs object detection on the image from various input types using YOLO (YOLOv11 nano).

    Args:
        input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
        uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
        image_url (str): The image URL (if input_type is "Enter URL").
        base64_string (str): The image base64 string (if input_type is "Enter Base64").

    Returns:
        tuple: A tuple containing:
            - numpy.ndarray: The image with detected objects drawn on it, or None if an error occurred or model not loaded.
            - dict: A dictionary containing the raw detection data (bounding boxes, classes, scores), or None.
    """
    if model is None:
        print("YOLO model is not loaded. Cannot perform object detection.")
        return None, None  # Return None for both outputs

    image = None
    input_value = None

    if input_type == "Upload File" and uploaded_image is not None:
        image = uploaded_image  # This is a PIL Image
        print("Using uploaded image (PIL) for object detection")  # Debug print

    elif input_type == "Enter URL" and image_url and image_url.strip():
        input_value = image_url
        print(f"Using URL for object detection: {input_value}")  # Debug print

    elif input_type == "Enter Base64" and base64_string and base64_string.strip():
        input_value = base64_string
        print("Using Base64 string for object detection")  # Debug print

    else:
        print("No valid input provided for object detection based on selected type.")
        return None, None  # Return None for both outputs

    # If input_value is set (URL or Base64), use load_image
    if input_value:
        image = load_image(input_value)
        if image is None:
            return None, None  # load_image failed

    # Now 'image' should be a PIL Image or None
    if image is None:
        print("Image is None after loading/selection for object detection.")
        return None, None  # Return None for both outputs

    try:
        # Preprocess the image (convert PIL to numpy, ensure RGB)
        processed_image_np = preprocess_image(image)

        # Perform inference
        results = model.predict(processed_image_np)

        # Extract raw detection data
        raw_data = []
        if results and results[0].boxes:
            for box in results[0].boxes:
                # box.xywh contains [x_center, y_center, width, height]
                # box.conf contains confidence score
                # box.cls contains class index
                x_center, y_center, width, height = [
                    round(float(coord)) for coord in box.xywh[0].tolist()
                ]  # Changed to xywh
                confidence = round(float(box.conf[0]), 4)
                class_id = int(box.cls[0])
                class_name = (
                    model.names[class_id] if model.names else str(class_id)
                )  # Get class name if available

                raw_data.append(
                    {
                        "box": {
                            "x": x_center,
                            "y": y_center,
                            "w": width,
                            "h": height,
                        },  # Updated keys
                        "confidence": confidence,
                        "class_id": class_id,
                        "class_name": class_name,
                    }
                )

        # Draw results on the image
        result_image_np = (
            results[0].plot() if results else processed_image_np
        )  # Plot if results exist

        print("Object detection performed successfully.")
        return result_image_np, raw_data  # Return both the image and raw data

    except Exception as e:
        print(f"Error during YOLO object detection: {e}")
        return None, None  # Return None for both outputs