Spaces:

rifatramadhani
/

cvtools

Running on Zero

App Files Files Community

rifatramadhani commited on 12 days ago

Commit

e70400c

1 Parent(s): ece8c80

wip

Browse files

Files changed (12) hide show

.gitignore +28 -0
README.md +40 -2
age_estimation/age_estimation.py +53 -0
age_estimation/model.py +42 -0
age_estimation/predict.py +79 -0
app.py +164 -4
detection/face_detection.py +80 -0
detection/object_detection.py +65 -0
requirements.txt +106 -0
utils/face_detector.py +11 -0
utils/image_utils.py +103 -0
utils/ui_utils.py +40 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,28 @@

+# Byte-code files
+*.pyc
+__pycache__/
+# Distribution / build outputs
+dist/
+build/
+*.egg-info/
+# Virtual environment
+venv/
+.venv/
+# Editors/IDEs
+.vscode/
+.idea/
+# Test and coverage
+.coverage
+htmlcov/
+# Data files
+*.sqlite3
+*.db
+.env
+.env.local
+.secrets

README.md CHANGED Viewed

@@ -8,7 +8,45 @@ sdk_version: 5.25.2
 app_file: app.py
 pinned: false
 license: mit
-short_description: A various computer vision related tools showcase.
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: mit
+short_description: A collection of computer vision tools.
 ---
+# Cvtools
+Cvtools is a project showcasing various computer vision functionalities through an interactive Gradio web interface.
+## Features
+The application provides the following features via separate tabs in the Gradio interface:
+*   **Face Detection:** Detects faces in images using either OpenCV's Haar Cascade Classifier or dlib.
+*   **Age Estimation:** Estimates the age of detected faces using a pre-trained model.
+*   **Object Detection:** (Placeholder) This tab is a placeholder for future object detection implementation.
+## Setup and Installation
+To run this project locally, you will need Python 3.10 or higher installed.
+1.  Clone the repository:
+    ```bash
+    git clone <repository_url>
+    cd Cvtools
+    ```
+2.  Install the required dependencies:
+    ```bash
+    pip install -r requirements.txt
+    ```
+## Running the Application
+To start the Gradio application, run:
+```bash
+python app.py
+```
+The application will be available in your web browser at the address provided in the console output.
+## License
+This project is licensed under the MIT License.

age_estimation/age_estimation.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import os
+import tempfile
+import torch
+import dlib
+from PIL import Image
+from .model import load_model
+from utils.image_utils import load_image, preprocess_image, get_image_from_input
+from utils.face_detector import load_face_detector
+from .predict import predict_age
+def age_estimation(input_type, uploaded_image, image_url, base64_string):
+    """
+    Estimates the age from an image input via file, URL, or base64 string.
+    Args:
+        input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
+        uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
+        image_url (str): The image URL (if input_type is "Enter URL").
+        base64_string (str): The image base64 string (if input_type is "Enter Base64").
+    Returns:
+        str: The estimated age, or an error message.
+    """
+    # Use the centralized function to get the image
+    image = get_image_from_input(input_type, uploaded_image, image_url, base64_string)
+    if image is None:
+        print("Image is None after loading/selection for age estimation.")
+        return "Error: Image processing failed or no valid input provided."
+    try:
+        face_detector = load_face_detector()
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model = load_model(device)
+        # Preprocess the image (convert PIL to numpy, ensure RGB)
+        processed_image = preprocess_image(image)
+        # Call predict_age with the processed image (NumPy array)
+        age_data = predict_age(processed_image, model, face_detector, device)
+        if age_data:
+            # Assuming age_data is a list of dictionaries, and we take the first face's age
+            return f"Estimated Age: {age_data[0]['age']}"
+        else:
+            return "No faces detected"
+    except Exception as e:
+        print(f"Error in age estimation: {e}")
+        return f"Error in age estimation: {e}"

age_estimation/model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import huggingface_hub
+import pretrainedmodels
+import torch
+import torch.nn as nn
+def get_model(model_name="se_resnext50_32x4d", num_classes=101, pretrained="imagenet"):
+    """
+    Loads a pre-trained model.
+    Args:
+        model_name (str): Name of the model to load.
+        num_classes (int): Number of classes for the model.
+        pretrained (str): Whether to use pre-trained weights.
+    Returns:
+        torch.nn.Module: The loaded model.
+    """
+    model = pretrainedmodels.__dict__[model_name](pretrained=pretrained)
+    dim_feats = model.last_linear.in_features
+    model.last_linear = nn.Linear(dim_feats, num_classes)
+    model.avg_pool = nn.AdaptiveAvgPool2d(1)
+    return model
+def load_model(device):
+    """
+    Loads the age estimation model from Hugging Face Hub.
+    Args:
+        device (torch.device): The device to load the model onto.
+    Returns:
+        torch.nn.Module: The loaded model.
+    """
+    model = get_model(model_name="se_resnext50_32x4d", pretrained=None)
+    path = huggingface_hub.hf_hub_download(
+        "public-data/yu4u-age-estimation-pytorch", "pretrained.pth"
+    )
+    model.load_state_dict(torch.load(path, weights_only=True))
+    model = model.to(device)
+    model.eval()
+    return model

age_estimation/predict.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import cv2
+import numpy as np
+import dlib
+import torch
+import torch.nn.functional as F
+AGE_ESTIMATION_MARGIN = 0.4
+AGE_ESTIMATION_INPUT_SIZE = 224
+@torch.inference_mode()
+def predict_age(image, model, face_detector, device, margin=AGE_ESTIMATION_MARGIN, input_size=AGE_ESTIMATION_INPUT_SIZE):
+    """
+    Predicts the age of faces in an image.
+    Args:
+        image (numpy.ndarray): The image as a NumPy array (HWC, BGR).
+        model (torch.nn.Module): The age estimation model.
+        face_detector (dlib.detector): The dlib face detector.
+        device (torch.device): The device to run the model on.
+        margin (float): The margin to add around the detected face.
+        input_size (int): The size of the input image for the model.
+    Returns:
+        list: A list of dictionaries containing the age and face coordinates for each detected face.
+    """
+    # Read the image using OpenCV
+    # The image is already a NumPy array (HWC, BGR)
+    # Ensure it's in the correct color space if needed by dlib or subsequent steps
+    # dlib's detector can work on grayscale or RGB. The current code uses the BGR array directly.
+    # Let's keep it as is for now, assuming the input array is BGR as produced by cv2 or similar.
+    # If preprocess_image returns RGB, we might need a conversion here or in preprocess_image.
+    # Checking utils/image_utils.py, preprocess_image converts to RGB PIL, then to numpy array.
+    # PIL to numpy conversion usually results in RGB. cv2 expects BGR.
+    # Let's convert the input image (assumed RGB from preprocess_image) to BGR for cv2 operations.
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    image_h, image_w = image.shape[:2]
+    # Detect faces in the image using the dlib face detector
+    detected = face_detector(image, 3)
+    faces = np.empty((len(detected), input_size, input_size, 3))
+    age_data = []
+    # Process each detected face
+    if len(detected) > 0:
+        for i, d in enumerate(detected):
+            # Get face coordinates and dimensions
+            x1, y1, x2, y2, w, h = d.left(), d.top(
+            ), d.right() + 1, d.bottom() + 1, d.width(), d.height()
+            # Calculate expanded face region with margin
+            xw1 = max(int(x1 - margin * w), 0)
+            yw1 = max(int(y1 - margin * h), 0)
+            xw2 = min(int(x2 + margin * w), image_w - 1)
+            yw2 = min(int(y2 + margin * h), image_h - 1)
+            # Resize face image to the required input size for the model
+            faces[i] = cv2.resize(image[yw1:yw2 + 1, xw1:xw2 + 1],
+                                  (input_size, input_size))
+            # Draw rectangles around the detected face and the expanded region
+            cv2.rectangle(image, (x1, y1), (x2, y2), (255, 255, 255), 2)
+            cv2.rectangle(image, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2)
+        # Prepare face images for model input
+        inputs = torch.from_numpy(
+            np.transpose(faces.astype(np.float32), (0, 3, 1, 2))).to(device)
+        # Perform age prediction using the model
+        outputs = F.softmax(model(inputs), dim=-1).cpu().numpy()
+        ages = np.arange(0, 101)
+        predicted_ages = (outputs * ages).sum(axis=-1)
+        # Store the predicted age and face coordinates
+        for age, d in zip(predicted_ages, detected):
+            age_text = f'{int(age)}'
+            age_data.append({'age': int(age), 'text': age_text, 'face_coordinates': (d.left(), d.top())})
+    # Return the list of age data for each detected face
+    return age_data

app.py CHANGED Viewed

@@ -1,7 +1,167 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+# Standard library imports
+import os
 import gradio as gr
+import torch.nn.functional as F
+import torch.nn as nn
+# Local imports
+from age_estimation.age_estimation import age_estimation
+from detection.face_detection import face_detection
+from detection.object_detection import object_detection
+from utils.ui_utils import update_input_visibility
+with gr.Blocks() as demo:
+    # Add a title to the interface
+    gr.Markdown("# Computer Vision Tools")
+    # Create a tab for face detection
+    with gr.Tab("Face Detection"):
+        # Input Method Selection
+        face_input_type = gr.Radio(
+            ["Upload File", "Enter URL", "Enter Base64"],
+            label="Input Method",
+            value="Upload File",  # Default selection
+        )
+        # Face Detection Method Selection
+        face_detection_method = gr.Radio(
+            ["OpenCV", "dlib"],
+            label="Face Detection Method",
+            value="OpenCV",  # Default selection
+        )
+        # Input Components (initially only file upload is visible)
+        with gr.Row():
+            face_img_upload = gr.Image(type="pil", label="Upload Image", visible=True)
+            face_url_input = gr.Textbox(
+                label="Enter Image URL", placeholder="e.g., https://...", visible=False
+            )
+            face_base64_input = gr.Textbox(
+                label="Enter Base64 String",
+                placeholder="Enter base64 string here...",
+                visible=False,
+            )
+        # Process Button
+        face_process_btn = gr.Button("Process Image")
+        # Output Component
+        face_image_output = gr.Image(label="Detected Faces")
+        # Link radio button change to visibility update function
+        face_input_type.change(
+            fn=update_input_visibility,
+            inputs=[
+                face_input_type,
+                face_img_upload,
+                face_url_input,
+                face_base64_input,
+            ],
+            outputs=[face_img_upload, face_url_input, face_base64_input],
+            queue=False,
+        )
+        # Link process button to the face detection function
+        # The face_detection function will need to be updated to handle these inputs
+        face_process_btn.click(
+            fn=face_detection,
+            inputs=[
+                face_input_type,
+                face_img_upload,
+                face_url_input,
+                face_base64_input,
+                face_detection_method,
+            ],
+            outputs=face_image_output,
+        )
+    # Create a tab for age estimation
+    with gr.Tab("Age Estimation"):
+        # Input Method Selection
+        age_input_type = gr.Radio(
+            ["Upload File", "Enter URL", "Enter Base64"],
+            label="Input Method",
+            value="Upload File",  # Default selection
+        )
+        # Input Components (initially only file upload is visible)
+        with gr.Row():
+            age_img_upload = gr.Image(type="pil", label="Upload Image", visible=True)
+            age_url_input = gr.Textbox(
+                label="Enter Image URL", placeholder="e.g., https://...", visible=False
+            )
+            age_base64_input = gr.Textbox(
+                label="Enter Base64 String",
+                placeholder="Enter base64 string here...",
+                visible=False,
+            )
+        # Process Button
+        age_process_btn = gr.Button("Estimate Age")
+        # Output Component
+        age_text_output = gr.Textbox(label="Estimated Age")
+        # Link radio button change to visibility update function
+        age_input_type.change(
+            fn=update_input_visibility,
+            inputs=[age_input_type, age_img_upload, age_url_input, age_base64_input],
+            outputs=[age_img_upload, age_url_input, age_base64_input],
+            queue=False,
+        )
+        # Link process button to the age estimation function
+        # The age_estimation function will need to be updated to handle these inputs
+        age_process_btn.click(
+            fn=age_estimation,
+            inputs=[age_input_type, age_img_upload, age_url_input, age_base64_input],
+            outputs=age_text_output,
+        )
+    # Create a tab for object detection
+    with gr.Tab("Object Detection"):
+        # Input Method Selection
+        obj_input_type = gr.Radio(
+            ["Upload File", "Enter URL", "Enter Base64"],
+            label="Input Method",
+            value="Upload File",  # Default selection
+        )
+        # Input Components (initially only file upload is visible)
+        with gr.Row():
+            obj_img_upload = gr.Image(type="pil", label="Upload Image", visible=True)
+            obj_url_input = gr.Textbox(
+                label="Enter Image URL", placeholder="e.g., https://...", visible=False
+            )
+            obj_base64_input = gr.Textbox(
+                label="Enter Base64 String",
+                placeholder="Enter base64 string here...",
+                visible=False,
+            )
+        # Process Button
+        obj_process_btn = gr.Button("Detect Objects")
+        # Output Component
+        obj_image_output = gr.Image(label="Detected Objects")
+        # Link radio button change to visibility update function
+        obj_input_type.change(
+            fn=update_input_visibility,
+            inputs=[obj_input_type, obj_img_upload, obj_url_input, obj_base64_input],
+            outputs=[obj_img_upload, obj_url_input, obj_base64_input],
+            queue=False,
+        )
+        # Link process button to the object detection function
+        # The object_detection function will need to be updated to handle these inputs
+        obj_process_btn.click(
+            fn=object_detection,
+            inputs=[obj_input_type, obj_img_upload, obj_url_input, obj_base64_input],
+            outputs=obj_image_output,
+        )
+    # Launch the Gradio demo
+    port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
+    import sys
+    if "--server_port" in sys.argv:
+        port = int(sys.argv[sys.argv.index("--server_port") + 1])
+    demo.launch(server_port=port, ssr_mode=True, share=True)

detection/face_detection.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# Standard library imports
+import os
+# Third-party imports
+import cv2
+import numpy as np
+from PIL import Image
+# Local imports
+from utils.image_utils import load_image, preprocess_image, get_image_from_input
+from utils.face_detector import load_face_detector # Assuming this is the dlib detector loader
+# Define constants
+HAAR_CASCADE_FILENAME = "haarcascade_frontalface_default.xml"
+def face_detection(input_type, uploaded_image, image_url, base64_string, face_detection_method):
+    """
+    Performs face detection on the image from various input types using the selected method.
+    Args:
+        input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
+        uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
+        image_url (str): The image URL (if input_type is "Enter URL").
+        base64_string (str): The image base64 string (if input_type is "Enter Base64").
+        face_detection_method (str): The selected face detection method ("OpenCV" or "dlib").
+    Returns:
+        numpy.ndarray: The image with detected faces, or None if an error occurred.
+    """
+    # Use the centralized function to get the image
+    image = get_image_from_input(input_type, uploaded_image, image_url, base64_string)
+    if image is None:
+        print("Image is None after loading/selection.")
+        return None # No valid input or loading failed
+    try:
+        # Preprocess the image (convert PIL to numpy, ensure RGB)
+        # preprocess_image expects a PIL Image or something convertible by Image.fromarray
+        processed_image = preprocess_image(image)
+        if processed_image is not None:
+            gray = cv2.cvtColor(processed_image, cv2.COLOR_BGR2GRAY)
+            if face_detection_method == "OpenCV":
+                print("Using OpenCV for face detection.")
+                # Ensure the haarcascade file is accessible.
+                # This path might need adjustment depending on the environment.
+                # Construct the full path to the Haar cascade file
+                cascade_path = os.path.join(cv2.data.haarcascades, HAAR_CASCADE_FILENAME)
+                # Check if the cascade file exists
+                if not os.path.exists(cascade_path):
+                     error_message = f"Error: Haar cascade file not found at {cascade_path}. Please ensure OpenCV is installed correctly and the file exists."
+                     print(error_message)
+                     return None
+                face_cascade = cv2.CascadeClassifier(cascade_path)
+                faces = face_cascade.detectMultiScale(gray, 1.1, 4)
+                for x, y, w, h in faces:
+                    cv2.rectangle(processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
+            elif face_detection_method == "dlib":
+                print("Using dlib for face detection.")
+                face_detector = load_face_detector()
+                # dlib works on RGB images, but the detector can take grayscale
+                # However, the rectangles are relative to the original image size
+                # Let's use the original processed_image (RGB numpy array) for drawing
+                faces = face_detector(processed_image, 1) # 1 is the upsample level
+                for face in faces:
+                    x, y, w, h = face.left(), face.top(), face.width(), face.height()
+                    cv2.rectangle(processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
+            return processed_image
+        else:
+            return None
+    except Exception as e:
+        print(f"Error in face detection processing: {e}")
+        return None

detection/object_detection.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# Standard library imports
+# (Add any necessary imports for future object detection implementation)
+# Third-party imports
+from PIL import Image
+import numpy as np
+# Local imports
+from utils.image_utils import load_image, preprocess_image
+def object_detection(input_type, uploaded_image, image_url, base64_string):
+    """
+    Performs object detection on the image from various input types.
+    Args:
+        input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
+        uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
+        image_url (str): The image URL (if input_type is "Enter URL").
+        base64_string (str): The image base64 string (if input_type is "Enter Base64").
+    Returns:
+        numpy.ndarray: The image with detected objects, or None if an error occurred.
+    """
+    image = None
+    input_value = None
+    if input_type == "Upload File" and uploaded_image is not None:
+        image = uploaded_image # This is a PIL Image
+        print("Using uploaded image (PIL) for object detection") # Debug print
+    elif input_type == "Enter URL" and image_url and image_url.strip():
+        input_value = image_url
+        print(f"Using URL for object detection: {input_value}") # Debug print
+    elif input_type == "Enter Base64" and base64_string and base64_string.strip():
+        input_value = base64_string
+        print(f"Using Base64 string for object detection") # Debug print
+    else:
+        print("No valid input provided for object detection based on selected type.")
+        return None # No valid input
+    # If input_value is set (URL or Base64), use load_image
+    if input_value:
+        image = load_image(input_value)
+        if image is None:
+            return None # load_image failed
+    # Now 'image' should be a PIL Image or None
+    if image is None:
+        print("Image is None after loading/selection for object detection.")
+        return None
+    try:
+        # Preprocess the image (convert PIL to numpy, ensure RGB)
+        # preprocess_image expects a PIL Image or something convertible by Image.fromarray
+        processed_image = preprocess_image(image)
+        # TODO: Implement object detection logic here
+        # Currently just returns the processed image
+        print("Object detection logic placeholder executed.")
+        return processed_image
+    except Exception as e:
+        print(f"Error in object detection processing: {e}")
+        return None

requirements.txt ADDED Viewed

	@@ -0,0 +1,106 @@

+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.16
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+asttokens==3.0.0
+async-timeout==5.0.1
+attrs==25.3.0
+Authlib==1.5.2
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.0.4
+cryptography==44.0.2
+datasets==3.5.0
+decorator==5.2.1
+dill==0.3.8
+dlib==19.24.8
+exceptiongroup==1.2.2
+executing==2.2.0
+fastapi==0.115.12
+ffmpy==0.5.0
+filelock==3.18.0
+frozenlist==1.5.0
+fsspec==2024.12.0
+gradio==5.25.2
+gradio_client==1.8.0
+groovy==0.1.2
+h11==0.14.0
+hf-xet==1.0.3
+hf_transfer==0.1.9
+httpcore==1.0.8
+httpx==0.28.1
+huggingface-hub==0.30.2
+idna==3.10
+ipython==8.35.0
+itsdangerous==2.2.0
+jedi==0.19.2
+Jinja2==3.1.6
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.4.3
+multiprocess==0.70.16
+munch==4.0.0
+networkx==3.4.2
+numpy==2.2.4
+opencv-python==4.11.0.86
+orjson==3.10.16
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0
+pillow==11.2.1
+pretrainedmodels==0.7.4
+prompt_toolkit==3.0.51
+propcache==0.3.1
+protobuf==3.20.3
+psutil==5.9.8
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==19.0.1
+pycparser==2.22
+pydantic==2.11.3
+pydantic_core==2.33.1
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==14.0.0
+ruff==0.11.6
+safehttpx==0.1.6
+safetensors==0.5.3
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+spaces==0.35.0
+stack-data==0.6.3
+starlette==0.46.2
+sympy==1.13.1
+tokenizers==0.21.1
+tomlkit==0.13.2
+torch==2.5.1
+torchvision==0.20.1
+tqdm==4.67.1
+traitlets==5.14.3
+transformers==4.51.3
+triton==3.1.0
+typer==0.15.2
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.1
+wcwidth==0.2.13
+websockets==15.0.1
+xxhash==3.5.0
+yarl==1.19.0

utils/face_detector.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import dlib
+def load_face_detector():
+    """
+    Loads the dlib face detector.
+    Returns:
+        dlib.detector: The dlib face detector.
+    """
+    face_detector = dlib.get_frontal_face_detector()
+    return face_detector

utils/image_utils.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# Standard library imports
+import io
+import base64
+import urllib.request
+# Third-party imports
+from PIL import Image
+import numpy as np
+import cv2 # preprocess_image uses cv2.cvtColor, so it's needed here
+def load_image(image_path):
+    """
+    Loads an image from a URL, base64 string, or file.
+    Args:
+        image_path (str): The path to the image. It can be a URL, a base64 string, or a file path.
+    Returns:
+        PIL.Image.Image: The loaded image.
+    """
+    try:
+        if image_path.startswith("http://") or image_path.startswith("https://"):
+            # Debug url
+            print("Debug URL:", image_path)
+            # Load image from URL
+            with urllib.request.urlopen(image_path) as response:
+                image = Image.open(io.BytesIO(response.read()))
+        elif image_path.startswith("data:image"):
+            # Load image from base64 string
+            image_data = base64.b64decode(image_path.split(",")[1])
+            image = Image.open(io.BytesIO(image_data))
+        else:
+            # Load image from file
+            image = Image.open(image_path)
+        return image
+    except Exception as e:
+        print(f"Error loading image: {e}")
+        return None
+def preprocess_image(image):
+    """
+    Preprocesses the image for the models.
+    Args:
+        image (PIL.Image.Image): The image to preprocess.
+    Returns:
+        numpy.ndarray: The preprocessed image as a NumPy array.
+    """
+    # Ensure image is a PIL Image before converting
+    if not isinstance(image, Image.Image):
+         image = Image.fromarray(image)
+    image = image.convert("RGB")
+    image = np.array(image)
+    return image
+def get_image_from_input(input_type, uploaded_image, image_url, base64_string):
+    """
+    Centralized function to get an image from various input types.
+    Args:
+        input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
+        uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
+        image_url (str): The image URL (if input_type is "Enter URL").
+        base64_string (str): The image base64 string (if input_type is "Enter Base64").
+    Returns:
+        PIL.Image.Image: The loaded image, or None if an error occurred or no valid input was provided.
+    """
+    image = None
+    input_value = None
+    if input_type == "Upload File" and uploaded_image is not None:
+        image = uploaded_image # This is a PIL Image from gr.Image(type="pil")
+        print("Using uploaded image (PIL)") # Debug print
+    elif input_type == "Enter URL" and image_url and image_url.strip():
+        input_value = image_url
+        print(f"Using URL: {input_value}") # Debug print
+    elif input_type == "Enter Base64" and base64_string and base64_string.strip():
+        input_value = base64_string
+        print(f"Using Base64 string") # Debug print
+    else:
+        print("No valid input provided based on selected type.")
+        return None # No valid input
+    # If input_value is set (URL or Base64), use the local load_image
+    if input_value:
+        image = load_image(input_value)
+        if image is None:
+            print("Error: Could not load image from provided input.")
+            return None # load_image failed
+    # Now 'image' should be a PIL Image or None
+    if image is None:
+        print("Image is None after loading/selection.")
+        return None
+    return image

utils/ui_utils.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Utility functions for UI components
+import gradio as gr
+def update_input_visibility(choice, upload_component, url_component, base64_component):
+    """
+    Updates the visibility of input components based on the selected input method.
+    Args:
+        choice (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
+        upload_component (gr.components): The Gradio component for file upload.
+        url_component (gr.components): The Gradio component for URL input.
+        base64_component (gr.components): The Gradio component for Base64 input.
+    Returns:
+        tuple: A tuple containing the updated Gradio components with their visibility set.
+    """
+    if choice == "Upload File":
+        return (
+            upload_component.update(visible=True),
+            url_component.update(visible=False),
+            base64_component.update(visible=False),
+        )
+    elif choice == "Enter URL":
+        return (
+            upload_component.update(visible=False),
+            url_component.update(visible=True),
+            base64_component.update(visible=False),
+        )
+    elif choice == "Enter Base64":
+        return (
+            upload_component.update(visible=False),
+            url_component.update(visible=False),
+            base64_component.update(visible=True),
+        )
+    else: # Default or unexpected
+         return (
+            upload_component.update(visible=True),
+            url_component.update(visible=False),
+            base64_component.update(visible=False),
+         )