rifatramadhani commited on
Commit
e70400c
·
1 Parent(s): ece8c80
.gitignore ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-code files
2
+ *.pyc
3
+ __pycache__/
4
+
5
+ # Distribution / build outputs
6
+ dist/
7
+ build/
8
+ *.egg-info/
9
+
10
+ # Virtual environment
11
+ venv/
12
+ .venv/
13
+
14
+ # Editors/IDEs
15
+ .vscode/
16
+ .idea/
17
+
18
+ # Test and coverage
19
+ .coverage
20
+ htmlcov/
21
+
22
+ # Data files
23
+ *.sqlite3
24
+ *.db
25
+
26
+ .env
27
+ .env.local
28
+ .secrets
README.md CHANGED
@@ -8,7 +8,45 @@ sdk_version: 5.25.2
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
- short_description: A various computer vision related tools showcase.
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ short_description: A collection of computer vision tools.
12
  ---
13
 
14
+ # Cvtools
15
+
16
+ Cvtools is a project showcasing various computer vision functionalities through an interactive Gradio web interface.
17
+
18
+ ## Features
19
+
20
+ The application provides the following features via separate tabs in the Gradio interface:
21
+
22
+ * **Face Detection:** Detects faces in images using either OpenCV's Haar Cascade Classifier or dlib.
23
+ * **Age Estimation:** Estimates the age of detected faces using a pre-trained model.
24
+ * **Object Detection:** (Placeholder) This tab is a placeholder for future object detection implementation.
25
+
26
+ ## Setup and Installation
27
+
28
+ To run this project locally, you will need Python 3.10 or higher installed.
29
+
30
+ 1. Clone the repository:
31
+ ```bash
32
+ git clone <repository_url>
33
+ cd Cvtools
34
+ ```
35
+ 2. Install the required dependencies:
36
+ ```bash
37
+ pip install -r requirements.txt
38
+ ```
39
+
40
+ ## Running the Application
41
+
42
+ To start the Gradio application, run:
43
+
44
+ ```bash
45
+ python app.py
46
+ ```
47
+
48
+ The application will be available in your web browser at the address provided in the console output.
49
+
50
+ ## License
51
+
52
+ This project is licensed under the MIT License.
age_estimation/age_estimation.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import torch
4
+ import dlib
5
+
6
+ from PIL import Image
7
+
8
+ from .model import load_model
9
+ from utils.image_utils import load_image, preprocess_image, get_image_from_input
10
+ from utils.face_detector import load_face_detector
11
+ from .predict import predict_age
12
+
13
+
14
+ def age_estimation(input_type, uploaded_image, image_url, base64_string):
15
+ """
16
+ Estimates the age from an image input via file, URL, or base64 string.
17
+
18
+ Args:
19
+ input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
20
+ uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
21
+ image_url (str): The image URL (if input_type is "Enter URL").
22
+ base64_string (str): The image base64 string (if input_type is "Enter Base64").
23
+
24
+ Returns:
25
+ str: The estimated age, or an error message.
26
+ """
27
+ # Use the centralized function to get the image
28
+ image = get_image_from_input(input_type, uploaded_image, image_url, base64_string)
29
+
30
+ if image is None:
31
+ print("Image is None after loading/selection for age estimation.")
32
+ return "Error: Image processing failed or no valid input provided."
33
+
34
+ try:
35
+ face_detector = load_face_detector()
36
+
37
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
+ model = load_model(device)
39
+
40
+ # Preprocess the image (convert PIL to numpy, ensure RGB)
41
+ processed_image = preprocess_image(image)
42
+
43
+ # Call predict_age with the processed image (NumPy array)
44
+ age_data = predict_age(processed_image, model, face_detector, device)
45
+
46
+ if age_data:
47
+ # Assuming age_data is a list of dictionaries, and we take the first face's age
48
+ return f"Estimated Age: {age_data[0]['age']}"
49
+ else:
50
+ return "No faces detected"
51
+ except Exception as e:
52
+ print(f"Error in age estimation: {e}")
53
+ return f"Error in age estimation: {e}"
age_estimation/model.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import huggingface_hub
2
+ import pretrainedmodels
3
+ import torch
4
+ import torch.nn as nn
5
+
6
+ def get_model(model_name="se_resnext50_32x4d", num_classes=101, pretrained="imagenet"):
7
+ """
8
+ Loads a pre-trained model.
9
+
10
+ Args:
11
+ model_name (str): Name of the model to load.
12
+ num_classes (int): Number of classes for the model.
13
+ pretrained (str): Whether to use pre-trained weights.
14
+
15
+ Returns:
16
+ torch.nn.Module: The loaded model.
17
+ """
18
+ model = pretrainedmodels.__dict__[model_name](pretrained=pretrained)
19
+ dim_feats = model.last_linear.in_features
20
+ model.last_linear = nn.Linear(dim_feats, num_classes)
21
+ model.avg_pool = nn.AdaptiveAvgPool2d(1)
22
+ return model
23
+
24
+
25
+ def load_model(device):
26
+ """
27
+ Loads the age estimation model from Hugging Face Hub.
28
+
29
+ Args:
30
+ device (torch.device): The device to load the model onto.
31
+
32
+ Returns:
33
+ torch.nn.Module: The loaded model.
34
+ """
35
+ model = get_model(model_name="se_resnext50_32x4d", pretrained=None)
36
+ path = huggingface_hub.hf_hub_download(
37
+ "public-data/yu4u-age-estimation-pytorch", "pretrained.pth"
38
+ )
39
+ model.load_state_dict(torch.load(path, weights_only=True))
40
+ model = model.to(device)
41
+ model.eval()
42
+ return model
age_estimation/predict.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import dlib
4
+ import torch
5
+ import torch.nn.functional as F
6
+
7
+ AGE_ESTIMATION_MARGIN = 0.4
8
+ AGE_ESTIMATION_INPUT_SIZE = 224
9
+
10
+ @torch.inference_mode()
11
+ def predict_age(image, model, face_detector, device, margin=AGE_ESTIMATION_MARGIN, input_size=AGE_ESTIMATION_INPUT_SIZE):
12
+ """
13
+ Predicts the age of faces in an image.
14
+
15
+ Args:
16
+ image (numpy.ndarray): The image as a NumPy array (HWC, BGR).
17
+ model (torch.nn.Module): The age estimation model.
18
+ face_detector (dlib.detector): The dlib face detector.
19
+ device (torch.device): The device to run the model on.
20
+ margin (float): The margin to add around the detected face.
21
+ input_size (int): The size of the input image for the model.
22
+
23
+ Returns:
24
+ list: A list of dictionaries containing the age and face coordinates for each detected face.
25
+ """
26
+ # Read the image using OpenCV
27
+ # The image is already a NumPy array (HWC, BGR)
28
+ # Ensure it's in the correct color space if needed by dlib or subsequent steps
29
+ # dlib's detector can work on grayscale or RGB. The current code uses the BGR array directly.
30
+ # Let's keep it as is for now, assuming the input array is BGR as produced by cv2 or similar.
31
+ # If preprocess_image returns RGB, we might need a conversion here or in preprocess_image.
32
+ # Checking utils/image_utils.py, preprocess_image converts to RGB PIL, then to numpy array.
33
+ # PIL to numpy conversion usually results in RGB. cv2 expects BGR.
34
+ # Let's convert the input image (assumed RGB from preprocess_image) to BGR for cv2 operations.
35
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
36
+ image_h, image_w = image.shape[:2]
37
+
38
+ # Detect faces in the image using the dlib face detector
39
+ detected = face_detector(image, 3)
40
+ faces = np.empty((len(detected), input_size, input_size, 3))
41
+ age_data = []
42
+
43
+ # Process each detected face
44
+ if len(detected) > 0:
45
+ for i, d in enumerate(detected):
46
+ # Get face coordinates and dimensions
47
+ x1, y1, x2, y2, w, h = d.left(), d.top(
48
+ ), d.right() + 1, d.bottom() + 1, d.width(), d.height()
49
+
50
+ # Calculate expanded face region with margin
51
+ xw1 = max(int(x1 - margin * w), 0)
52
+ yw1 = max(int(y1 - margin * h), 0)
53
+ xw2 = min(int(x2 + margin * w), image_w - 1)
54
+ yw2 = min(int(y2 + margin * h), image_h - 1)
55
+
56
+ # Resize face image to the required input size for the model
57
+ faces[i] = cv2.resize(image[yw1:yw2 + 1, xw1:xw2 + 1],
58
+ (input_size, input_size))
59
+
60
+ # Draw rectangles around the detected face and the expanded region
61
+ cv2.rectangle(image, (x1, y1), (x2, y2), (255, 255, 255), 2)
62
+ cv2.rectangle(image, (xw1, yw1), (xw2, yw2), (255, 0, 0), 2)
63
+
64
+ # Prepare face images for model input
65
+ inputs = torch.from_numpy(
66
+ np.transpose(faces.astype(np.float32), (0, 3, 1, 2))).to(device)
67
+
68
+ # Perform age prediction using the model
69
+ outputs = F.softmax(model(inputs), dim=-1).cpu().numpy()
70
+ ages = np.arange(0, 101)
71
+ predicted_ages = (outputs * ages).sum(axis=-1)
72
+
73
+ # Store the predicted age and face coordinates
74
+ for age, d in zip(predicted_ages, detected):
75
+ age_text = f'{int(age)}'
76
+ age_data.append({'age': int(age), 'text': age_text, 'face_coordinates': (d.left(), d.top())})
77
+
78
+ # Return the list of age data for each detected face
79
+ return age_data
app.py CHANGED
@@ -1,7 +1,167 @@
 
 
1
  import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Standard library imports
2
+ import os
3
  import gradio as gr
4
+ import torch.nn.functional as F
5
+ import torch.nn as nn
6
 
7
+ # Local imports
8
+ from age_estimation.age_estimation import age_estimation
9
+ from detection.face_detection import face_detection
10
+ from detection.object_detection import object_detection
11
+ from utils.ui_utils import update_input_visibility
12
 
13
+ with gr.Blocks() as demo:
14
+ # Add a title to the interface
15
+ gr.Markdown("# Computer Vision Tools")
16
+ # Create a tab for face detection
17
+ with gr.Tab("Face Detection"):
18
+ # Input Method Selection
19
+ face_input_type = gr.Radio(
20
+ ["Upload File", "Enter URL", "Enter Base64"],
21
+ label="Input Method",
22
+ value="Upload File", # Default selection
23
+ )
24
+
25
+ # Face Detection Method Selection
26
+ face_detection_method = gr.Radio(
27
+ ["OpenCV", "dlib"],
28
+ label="Face Detection Method",
29
+ value="OpenCV", # Default selection
30
+ )
31
+
32
+ # Input Components (initially only file upload is visible)
33
+ with gr.Row():
34
+ face_img_upload = gr.Image(type="pil", label="Upload Image", visible=True)
35
+ face_url_input = gr.Textbox(
36
+ label="Enter Image URL", placeholder="e.g., https://...", visible=False
37
+ )
38
+ face_base64_input = gr.Textbox(
39
+ label="Enter Base64 String",
40
+ placeholder="Enter base64 string here...",
41
+ visible=False,
42
+ )
43
+
44
+ # Process Button
45
+ face_process_btn = gr.Button("Process Image")
46
+
47
+ # Output Component
48
+ face_image_output = gr.Image(label="Detected Faces")
49
+
50
+ # Link radio button change to visibility update function
51
+ face_input_type.change(
52
+ fn=update_input_visibility,
53
+ inputs=[
54
+ face_input_type,
55
+ face_img_upload,
56
+ face_url_input,
57
+ face_base64_input,
58
+ ],
59
+ outputs=[face_img_upload, face_url_input, face_base64_input],
60
+ queue=False,
61
+ )
62
+
63
+ # Link process button to the face detection function
64
+ # The face_detection function will need to be updated to handle these inputs
65
+ face_process_btn.click(
66
+ fn=face_detection,
67
+ inputs=[
68
+ face_input_type,
69
+ face_img_upload,
70
+ face_url_input,
71
+ face_base64_input,
72
+ face_detection_method,
73
+ ],
74
+ outputs=face_image_output,
75
+ )
76
+ # Create a tab for age estimation
77
+ with gr.Tab("Age Estimation"):
78
+ # Input Method Selection
79
+ age_input_type = gr.Radio(
80
+ ["Upload File", "Enter URL", "Enter Base64"],
81
+ label="Input Method",
82
+ value="Upload File", # Default selection
83
+ )
84
+
85
+ # Input Components (initially only file upload is visible)
86
+ with gr.Row():
87
+ age_img_upload = gr.Image(type="pil", label="Upload Image", visible=True)
88
+ age_url_input = gr.Textbox(
89
+ label="Enter Image URL", placeholder="e.g., https://...", visible=False
90
+ )
91
+ age_base64_input = gr.Textbox(
92
+ label="Enter Base64 String",
93
+ placeholder="Enter base64 string here...",
94
+ visible=False,
95
+ )
96
+
97
+ # Process Button
98
+ age_process_btn = gr.Button("Estimate Age")
99
+
100
+ # Output Component
101
+ age_text_output = gr.Textbox(label="Estimated Age")
102
+
103
+ # Link radio button change to visibility update function
104
+ age_input_type.change(
105
+ fn=update_input_visibility,
106
+ inputs=[age_input_type, age_img_upload, age_url_input, age_base64_input],
107
+ outputs=[age_img_upload, age_url_input, age_base64_input],
108
+ queue=False,
109
+ )
110
+
111
+ # Link process button to the age estimation function
112
+ # The age_estimation function will need to be updated to handle these inputs
113
+ age_process_btn.click(
114
+ fn=age_estimation,
115
+ inputs=[age_input_type, age_img_upload, age_url_input, age_base64_input],
116
+ outputs=age_text_output,
117
+ )
118
+ # Create a tab for object detection
119
+ with gr.Tab("Object Detection"):
120
+ # Input Method Selection
121
+ obj_input_type = gr.Radio(
122
+ ["Upload File", "Enter URL", "Enter Base64"],
123
+ label="Input Method",
124
+ value="Upload File", # Default selection
125
+ )
126
+
127
+ # Input Components (initially only file upload is visible)
128
+ with gr.Row():
129
+ obj_img_upload = gr.Image(type="pil", label="Upload Image", visible=True)
130
+ obj_url_input = gr.Textbox(
131
+ label="Enter Image URL", placeholder="e.g., https://...", visible=False
132
+ )
133
+ obj_base64_input = gr.Textbox(
134
+ label="Enter Base64 String",
135
+ placeholder="Enter base64 string here...",
136
+ visible=False,
137
+ )
138
+
139
+ # Process Button
140
+ obj_process_btn = gr.Button("Detect Objects")
141
+
142
+ # Output Component
143
+ obj_image_output = gr.Image(label="Detected Objects")
144
+
145
+ # Link radio button change to visibility update function
146
+ obj_input_type.change(
147
+ fn=update_input_visibility,
148
+ inputs=[obj_input_type, obj_img_upload, obj_url_input, obj_base64_input],
149
+ outputs=[obj_img_upload, obj_url_input, obj_base64_input],
150
+ queue=False,
151
+ )
152
+
153
+ # Link process button to the object detection function
154
+ # The object_detection function will need to be updated to handle these inputs
155
+ obj_process_btn.click(
156
+ fn=object_detection,
157
+ inputs=[obj_input_type, obj_img_upload, obj_url_input, obj_base64_input],
158
+ outputs=obj_image_output,
159
+ )
160
+
161
+ # Launch the Gradio demo
162
+ port = int(os.environ.get("GRADIO_SERVER_PORT", 7860))
163
+ import sys
164
+
165
+ if "--server_port" in sys.argv:
166
+ port = int(sys.argv[sys.argv.index("--server_port") + 1])
167
+ demo.launch(server_port=port, ssr_mode=True, share=True)
detection/face_detection.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Standard library imports
2
+ import os
3
+
4
+ # Third-party imports
5
+ import cv2
6
+ import numpy as np
7
+ from PIL import Image
8
+
9
+ # Local imports
10
+ from utils.image_utils import load_image, preprocess_image, get_image_from_input
11
+ from utils.face_detector import load_face_detector # Assuming this is the dlib detector loader
12
+
13
+ # Define constants
14
+ HAAR_CASCADE_FILENAME = "haarcascade_frontalface_default.xml"
15
+
16
+ def face_detection(input_type, uploaded_image, image_url, base64_string, face_detection_method):
17
+ """
18
+ Performs face detection on the image from various input types using the selected method.
19
+
20
+ Args:
21
+ input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
22
+ uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
23
+ image_url (str): The image URL (if input_type is "Enter URL").
24
+ base64_string (str): The image base64 string (if input_type is "Enter Base64").
25
+ face_detection_method (str): The selected face detection method ("OpenCV" or "dlib").
26
+
27
+ Returns:
28
+ numpy.ndarray: The image with detected faces, or None if an error occurred.
29
+ """
30
+ # Use the centralized function to get the image
31
+ image = get_image_from_input(input_type, uploaded_image, image_url, base64_string)
32
+
33
+ if image is None:
34
+ print("Image is None after loading/selection.")
35
+ return None # No valid input or loading failed
36
+
37
+ try:
38
+ # Preprocess the image (convert PIL to numpy, ensure RGB)
39
+ # preprocess_image expects a PIL Image or something convertible by Image.fromarray
40
+ processed_image = preprocess_image(image)
41
+
42
+ if processed_image is not None:
43
+ gray = cv2.cvtColor(processed_image, cv2.COLOR_BGR2GRAY)
44
+
45
+ if face_detection_method == "OpenCV":
46
+ print("Using OpenCV for face detection.")
47
+ # Ensure the haarcascade file is accessible.
48
+ # This path might need adjustment depending on the environment.
49
+ # Construct the full path to the Haar cascade file
50
+ cascade_path = os.path.join(cv2.data.haarcascades, HAAR_CASCADE_FILENAME)
51
+
52
+ # Check if the cascade file exists
53
+ if not os.path.exists(cascade_path):
54
+ error_message = f"Error: Haar cascade file not found at {cascade_path}. Please ensure OpenCV is installed correctly and the file exists."
55
+ print(error_message)
56
+ return None
57
+
58
+ face_cascade = cv2.CascadeClassifier(cascade_path)
59
+
60
+ faces = face_cascade.detectMultiScale(gray, 1.1, 4)
61
+ for x, y, w, h in faces:
62
+ cv2.rectangle(processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
63
+
64
+ elif face_detection_method == "dlib":
65
+ print("Using dlib for face detection.")
66
+ face_detector = load_face_detector()
67
+ # dlib works on RGB images, but the detector can take grayscale
68
+ # However, the rectangles are relative to the original image size
69
+ # Let's use the original processed_image (RGB numpy array) for drawing
70
+ faces = face_detector(processed_image, 1) # 1 is the upsample level
71
+ for face in faces:
72
+ x, y, w, h = face.left(), face.top(), face.width(), face.height()
73
+ cv2.rectangle(processed_image, (x, y), (x + w, y + h), (255, 0, 0), 2)
74
+
75
+ return processed_image
76
+ else:
77
+ return None
78
+ except Exception as e:
79
+ print(f"Error in face detection processing: {e}")
80
+ return None
detection/object_detection.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Standard library imports
2
+ # (Add any necessary imports for future object detection implementation)
3
+
4
+ # Third-party imports
5
+ from PIL import Image
6
+ import numpy as np
7
+
8
+ # Local imports
9
+ from utils.image_utils import load_image, preprocess_image
10
+
11
+ def object_detection(input_type, uploaded_image, image_url, base64_string):
12
+ """
13
+ Performs object detection on the image from various input types.
14
+
15
+ Args:
16
+ input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
17
+ uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
18
+ image_url (str): The image URL (if input_type is "Enter URL").
19
+ base64_string (str): The image base64 string (if input_type is "Enter Base64").
20
+
21
+ Returns:
22
+ numpy.ndarray: The image with detected objects, or None if an error occurred.
23
+ """
24
+ image = None
25
+ input_value = None
26
+
27
+ if input_type == "Upload File" and uploaded_image is not None:
28
+ image = uploaded_image # This is a PIL Image
29
+ print("Using uploaded image (PIL) for object detection") # Debug print
30
+
31
+ elif input_type == "Enter URL" and image_url and image_url.strip():
32
+ input_value = image_url
33
+ print(f"Using URL for object detection: {input_value}") # Debug print
34
+
35
+ elif input_type == "Enter Base64" and base64_string and base64_string.strip():
36
+ input_value = base64_string
37
+ print(f"Using Base64 string for object detection") # Debug print
38
+
39
+ else:
40
+ print("No valid input provided for object detection based on selected type.")
41
+ return None # No valid input
42
+
43
+ # If input_value is set (URL or Base64), use load_image
44
+ if input_value:
45
+ image = load_image(input_value)
46
+ if image is None:
47
+ return None # load_image failed
48
+
49
+ # Now 'image' should be a PIL Image or None
50
+ if image is None:
51
+ print("Image is None after loading/selection for object detection.")
52
+ return None
53
+
54
+ try:
55
+ # Preprocess the image (convert PIL to numpy, ensure RGB)
56
+ # preprocess_image expects a PIL Image or something convertible by Image.fromarray
57
+ processed_image = preprocess_image(image)
58
+
59
+ # TODO: Implement object detection logic here
60
+ # Currently just returns the processed image
61
+ print("Object detection logic placeholder executed.")
62
+ return processed_image
63
+ except Exception as e:
64
+ print(f"Error in object detection processing: {e}")
65
+ return None
requirements.txt ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==24.1.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.16
4
+ aiosignal==1.3.2
5
+ annotated-types==0.7.0
6
+ anyio==4.9.0
7
+ asttokens==3.0.0
8
+ async-timeout==5.0.1
9
+ attrs==25.3.0
10
+ Authlib==1.5.2
11
+ certifi==2025.1.31
12
+ cffi==1.17.1
13
+ charset-normalizer==3.4.1
14
+ click==8.0.4
15
+ cryptography==44.0.2
16
+ datasets==3.5.0
17
+ decorator==5.2.1
18
+ dill==0.3.8
19
+ dlib==19.24.8
20
+ exceptiongroup==1.2.2
21
+ executing==2.2.0
22
+ fastapi==0.115.12
23
+ ffmpy==0.5.0
24
+ filelock==3.18.0
25
+ frozenlist==1.5.0
26
+ fsspec==2024.12.0
27
+ gradio==5.25.2
28
+ gradio_client==1.8.0
29
+ groovy==0.1.2
30
+ h11==0.14.0
31
+ hf-xet==1.0.3
32
+ hf_transfer==0.1.9
33
+ httpcore==1.0.8
34
+ httpx==0.28.1
35
+ huggingface-hub==0.30.2
36
+ idna==3.10
37
+ ipython==8.35.0
38
+ itsdangerous==2.2.0
39
+ jedi==0.19.2
40
+ Jinja2==3.1.6
41
+ markdown-it-py==3.0.0
42
+ MarkupSafe==3.0.2
43
+ matplotlib-inline==0.1.7
44
+ mdurl==0.1.2
45
+ mpmath==1.3.0
46
+ multidict==6.4.3
47
+ multiprocess==0.70.16
48
+ munch==4.0.0
49
+ networkx==3.4.2
50
+ numpy==2.2.4
51
+ opencv-python==4.11.0.86
52
+ orjson==3.10.16
53
+ packaging==24.2
54
+ pandas==2.2.3
55
+ parso==0.8.4
56
+ pexpect==4.9.0
57
+ pillow==11.2.1
58
+ pretrainedmodels==0.7.4
59
+ prompt_toolkit==3.0.51
60
+ propcache==0.3.1
61
+ protobuf==3.20.3
62
+ psutil==5.9.8
63
+ ptyprocess==0.7.0
64
+ pure_eval==0.2.3
65
+ pyarrow==19.0.1
66
+ pycparser==2.22
67
+ pydantic==2.11.3
68
+ pydantic_core==2.33.1
69
+ pydub==0.25.1
70
+ Pygments==2.19.1
71
+ python-dateutil==2.9.0.post0
72
+ python-multipart==0.0.20
73
+ pytz==2025.2
74
+ PyYAML==6.0.2
75
+ regex==2024.11.6
76
+ requests==2.32.3
77
+ rich==14.0.0
78
+ ruff==0.11.6
79
+ safehttpx==0.1.6
80
+ safetensors==0.5.3
81
+ semantic-version==2.10.0
82
+ shellingham==1.5.4
83
+ six==1.17.0
84
+ sniffio==1.3.1
85
+ spaces==0.35.0
86
+ stack-data==0.6.3
87
+ starlette==0.46.2
88
+ sympy==1.13.1
89
+ tokenizers==0.21.1
90
+ tomlkit==0.13.2
91
+ torch==2.5.1
92
+ torchvision==0.20.1
93
+ tqdm==4.67.1
94
+ traitlets==5.14.3
95
+ transformers==4.51.3
96
+ triton==3.1.0
97
+ typer==0.15.2
98
+ typing-inspection==0.4.0
99
+ typing_extensions==4.13.2
100
+ tzdata==2025.2
101
+ urllib3==2.4.0
102
+ uvicorn==0.34.1
103
+ wcwidth==0.2.13
104
+ websockets==15.0.1
105
+ xxhash==3.5.0
106
+ yarl==1.19.0
utils/face_detector.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dlib
2
+
3
+ def load_face_detector():
4
+ """
5
+ Loads the dlib face detector.
6
+
7
+ Returns:
8
+ dlib.detector: The dlib face detector.
9
+ """
10
+ face_detector = dlib.get_frontal_face_detector()
11
+ return face_detector
utils/image_utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Standard library imports
2
+ import io
3
+ import base64
4
+ import urllib.request
5
+
6
+ # Third-party imports
7
+ from PIL import Image
8
+ import numpy as np
9
+ import cv2 # preprocess_image uses cv2.cvtColor, so it's needed here
10
+
11
+ def load_image(image_path):
12
+ """
13
+ Loads an image from a URL, base64 string, or file.
14
+
15
+ Args:
16
+ image_path (str): The path to the image. It can be a URL, a base64 string, or a file path.
17
+
18
+ Returns:
19
+ PIL.Image.Image: The loaded image.
20
+ """
21
+ try:
22
+ if image_path.startswith("http://") or image_path.startswith("https://"):
23
+ # Debug url
24
+ print("Debug URL:", image_path)
25
+ # Load image from URL
26
+ with urllib.request.urlopen(image_path) as response:
27
+ image = Image.open(io.BytesIO(response.read()))
28
+ elif image_path.startswith("data:image"):
29
+ # Load image from base64 string
30
+ image_data = base64.b64decode(image_path.split(",")[1])
31
+ image = Image.open(io.BytesIO(image_data))
32
+ else:
33
+ # Load image from file
34
+ image = Image.open(image_path)
35
+ return image
36
+ except Exception as e:
37
+ print(f"Error loading image: {e}")
38
+ return None
39
+
40
+
41
+ def preprocess_image(image):
42
+ """
43
+ Preprocesses the image for the models.
44
+
45
+ Args:
46
+ image (PIL.Image.Image): The image to preprocess.
47
+
48
+ Returns:
49
+ numpy.ndarray: The preprocessed image as a NumPy array.
50
+ """
51
+ # Ensure image is a PIL Image before converting
52
+ if not isinstance(image, Image.Image):
53
+ image = Image.fromarray(image)
54
+
55
+ image = image.convert("RGB")
56
+ image = np.array(image)
57
+ return image
58
+
59
+ def get_image_from_input(input_type, uploaded_image, image_url, base64_string):
60
+ """
61
+ Centralized function to get an image from various input types.
62
+
63
+ Args:
64
+ input_type (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
65
+ uploaded_image (PIL.Image.Image): The uploaded image (if input_type is "Upload File").
66
+ image_url (str): The image URL (if input_type is "Enter URL").
67
+ base64_string (str): The image base64 string (if input_type is "Enter Base64").
68
+
69
+ Returns:
70
+ PIL.Image.Image: The loaded image, or None if an error occurred or no valid input was provided.
71
+ """
72
+ image = None
73
+ input_value = None
74
+
75
+ if input_type == "Upload File" and uploaded_image is not None:
76
+ image = uploaded_image # This is a PIL Image from gr.Image(type="pil")
77
+ print("Using uploaded image (PIL)") # Debug print
78
+
79
+ elif input_type == "Enter URL" and image_url and image_url.strip():
80
+ input_value = image_url
81
+ print(f"Using URL: {input_value}") # Debug print
82
+
83
+ elif input_type == "Enter Base64" and base64_string and base64_string.strip():
84
+ input_value = base64_string
85
+ print(f"Using Base64 string") # Debug print
86
+
87
+ else:
88
+ print("No valid input provided based on selected type.")
89
+ return None # No valid input
90
+
91
+ # If input_value is set (URL or Base64), use the local load_image
92
+ if input_value:
93
+ image = load_image(input_value)
94
+ if image is None:
95
+ print("Error: Could not load image from provided input.")
96
+ return None # load_image failed
97
+
98
+ # Now 'image' should be a PIL Image or None
99
+ if image is None:
100
+ print("Image is None after loading/selection.")
101
+ return None
102
+
103
+ return image
utils/ui_utils.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utility functions for UI components
2
+ import gradio as gr
3
+
4
+ def update_input_visibility(choice, upload_component, url_component, base64_component):
5
+ """
6
+ Updates the visibility of input components based on the selected input method.
7
+
8
+ Args:
9
+ choice (str): The selected input method ("Upload File", "Enter URL", "Enter Base64").
10
+ upload_component (gr.components): The Gradio component for file upload.
11
+ url_component (gr.components): The Gradio component for URL input.
12
+ base64_component (gr.components): The Gradio component for Base64 input.
13
+
14
+ Returns:
15
+ tuple: A tuple containing the updated Gradio components with their visibility set.
16
+ """
17
+ if choice == "Upload File":
18
+ return (
19
+ upload_component.update(visible=True),
20
+ url_component.update(visible=False),
21
+ base64_component.update(visible=False),
22
+ )
23
+ elif choice == "Enter URL":
24
+ return (
25
+ upload_component.update(visible=False),
26
+ url_component.update(visible=True),
27
+ base64_component.update(visible=False),
28
+ )
29
+ elif choice == "Enter Base64":
30
+ return (
31
+ upload_component.update(visible=False),
32
+ url_component.update(visible=False),
33
+ base64_component.update(visible=True),
34
+ )
35
+ else: # Default or unexpected
36
+ return (
37
+ upload_component.update(visible=True),
38
+ url_component.update(visible=False),
39
+ base64_component.update(visible=False),
40
+ )