pavank007 commited on
Commit
8ed9665
·
verified ·
1 Parent(s): 95e6fb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +257 -158
app.py CHANGED
@@ -3,188 +3,287 @@ import numpy as np
3
  import torch
4
  from PIL import Image, ImageFilter
5
  import cv2
 
 
 
6
  from transformers import pipeline
7
 
 
 
 
 
 
 
 
8
  # Set device to GPU if available
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
- print(f"Using device: {device}")
 
 
 
 
11
 
12
- # Load models only once at startup to improve performance
13
- segmentation_model = "facebook/sam-vit-huge"
14
- depth_model = "depth-anything/Depth-Anything-V2-Small-hf"
15
 
16
- # Initialize pipelines
17
- segmentation_pipe = pipeline("image-segmentation", model=segmentation_model)
18
- depth_pipe = pipeline("depth-estimation", model=depth_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def get_segmentation_mask(input_image):
21
- """Get segmentation mask using the pre-loaded segmentation pipeline"""
22
- # Resize image to 512x512 for consistent processing
23
- input_image = input_image.resize((512, 512)).convert('RGB')
24
-
25
- # Get the segmentation result
26
- result = segmentation_pipe(input_image)
27
-
28
- # Extract the first mask (assuming it's the most prominent object)
29
- if len(result) > 0:
30
- # For SAM-like models that return multiple masks
31
- mask = result[0]['mask']
32
- mask = np.array(mask) * 255 # Scale to [0, 255]
33
- else:
34
- # Fallback - create empty mask
35
- mask = np.zeros((512, 512), dtype=np.uint8)
36
-
37
- # Convert to PIL Image
38
- mask_img = Image.fromarray(mask.astype(np.uint8))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- return mask_img, input_image
 
 
 
 
 
 
 
41
 
42
  def apply_background_blur(original_image, mask_image, sigma=15):
43
  """Apply Gaussian blur to the background using a segmentation mask"""
44
- # Ensure mask is binary (0 for background, 255 for foreground)
45
- mask_array = np.array(mask_image)
46
- _, binary_mask = cv2.threshold(mask_array, 127, 255, cv2.THRESH_BINARY)
47
- mask_img = Image.fromarray(binary_mask)
48
-
49
- # Create a blurred version of the original image
50
- blurred_img = original_image.filter(ImageFilter.GaussianBlur(radius=sigma))
51
-
52
- # Convert images to numpy arrays for easier manipulation
53
- original_array = np.array(original_image)
54
- blurred_array = np.array(blurred_img)
55
- mask_array = np.array(mask_img)
56
-
57
- # Create the composite image: foreground from original, background from blurred
58
- result_array = np.zeros_like(original_array)
59
-
60
- # Where mask is white (255), use original image; where mask is black (0), use blurred image
61
- for c in range(3): # For each color channel (RGB)
62
- result_array[:, :, c] = np.where(mask_array == 255,
63
- original_array[:, :, c],
64
- blurred_array[:, :, c])
65
-
66
- # Convert back to PIL Image
67
- result_img = Image.fromarray(result_array)
 
 
 
 
 
68
 
69
- return result_img
 
 
 
 
70
 
71
  def get_depth_map(input_image):
72
- """Get depth map using the pre-loaded depth estimation pipeline"""
73
- # Ensure image is in RGB format and resized to 512x512
74
- input_image = input_image.resize((512, 512)).convert('RGB')
75
-
76
- # Get the depth map
77
- result = depth_pipe(input_image)
78
- depth_map = result["depth"]
79
-
80
- # Convert to numpy array for further processing
81
- depth_array = np.array(depth_map)
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- return depth_map, depth_array
 
 
 
 
84
 
85
  def apply_depth_based_blur(original_image, depth_array, max_blur=30):
86
  """Apply variable Gaussian blur based on depth"""
87
- # Convert depth array to proper format if needed
88
- if len(depth_array.shape) == 3 and depth_array.shape[2] > 1:
89
- # If depth map has multiple channels, convert to grayscale
90
- depth_array = np.mean(depth_array, axis=2)
91
-
92
- # Normalize depth values to range [0, 1]
93
- depth_min = depth_array.min()
94
- depth_max = depth_array.max()
95
- normalized_depth = (depth_array - depth_min) / (depth_max - depth_min)
96
-
97
- # Create a series of increasingly blurred versions of the image
98
- blurred_images = []
99
- for blur_amount in range(max_blur + 1):
100
- blurred_images.append(original_image.filter(ImageFilter.GaussianBlur(radius=blur_amount)))
101
-
102
- # Convert to numpy arrays for easier processing
103
- original_array = np.array(original_image)
104
- result_array = np.zeros_like(original_array)
105
-
106
- # For each pixel, determine the blur level based on depth
107
- height, width = normalized_depth.shape
108
- for y in range(height):
109
- for x in range(width):
110
- # Calculate blur radius proportional to depth
111
- # Higher normalized_depth = farther object = more blur
112
- blur_radius = int(normalized_depth[y, x] * max_blur)
113
- result_array[y, x] = np.array(blurred_images[blur_radius])[y, x]
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- return Image.fromarray(result_array)
 
 
 
 
116
 
117
  def process_image(input_image, blur_sigma=15, max_depth_blur=30):
118
- """Main function to process the image through all effects"""
119
- if input_image is None:
120
- return None, None, None, None
121
-
122
- # Resize input image for consistent processing
123
- input_image = Image.fromarray(input_image).convert('RGB')
124
- input_image = input_image.resize((512, 512))
125
-
126
- # Step 1: Get segmentation mask
127
- mask, _ = get_segmentation_mask(input_image)
128
-
129
- # Step 2: Apply background blur
130
- blurred_background = apply_background_blur(input_image, mask, sigma=blur_sigma)
131
-
132
- # Step 3: Get depth map
133
- depth_map, depth_array = get_depth_map(input_image)
134
-
135
- # Step 4: Apply depth-based blur
136
- depth_blur = apply_depth_based_blur(input_image, depth_array, max_blur=max_depth_blur)
137
-
138
- # Convert all PIL images to numpy arrays for Gradio
139
- input_np = np.array(input_image)
140
- mask_np = np.array(mask)
141
- blurred_np = np.array(blurred_background)
142
- depth_map_np = np.array(depth_map)
143
- depth_blur_np = np.array(depth_blur)
144
 
145
- return input_np, mask_np, blurred_np, depth_map_np, depth_blur_np
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # Create Gradio Interface
148
- with gr.Blocks(title="Image Blur Effects - EEE 515 Assignment 3") as demo:
149
- gr.Markdown("# Image Blur Effects App")
150
- gr.Markdown("Upload an image to apply segmentation-based blur and depth-based lens blur effects")
151
-
152
- with gr.Row():
153
- input_image = gr.Image(label="Upload Image", type="numpy")
154
-
155
- with gr.Row():
156
- blur_sigma = gr.Slider(minimum=1, maximum=30, value=15, step=1, label="Background Blur Strength (σ)")
157
- depth_blur_max = gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Max Depth Blur Strength")
158
-
159
- with gr.Row():
160
- process_btn = gr.Button("Process Image")
161
-
162
- with gr.Tab("Segmentation Results"):
163
- with gr.Row():
164
- original_output = gr.Image(label="Original Image", type="numpy")
165
- mask_output = gr.Image(label="Segmentation Mask", type="numpy")
166
- with gr.Row():
167
- blurred_output = gr.Image(label="Background Blur Effect", type="numpy")
168
-
169
- with gr.Tab("Depth Results"):
170
- with gr.Row():
171
- depth_map_output = gr.Image(label="Depth Map", type="numpy")
172
- depth_blur_output = gr.Image(label="Depth-Based Lens Blur", type="numpy")
173
-
174
- process_btn.click(
175
- fn=process_image,
176
- inputs=[input_image, blur_sigma, depth_blur_max],
177
- outputs=[original_output, mask_output, blurred_output, depth_map_output, depth_blur_output]
178
- )
179
-
180
- gr.Markdown("## How it works")
181
- gr.Markdown("""
182
- 1. **Segmentation-Based Blur**: Uses a segmentation model to identify the foreground object,
183
- then applies Gaussian blur only to the background.
184
-
185
- 2. **Depth-Based Lens Blur**: Uses a monocular depth estimation model to create a depth map,
186
- then applies varying levels of blur based on the estimated depth.
187
- """)
188
 
189
  # Launch the app
190
- demo.launch()
 
 
3
  import torch
4
  from PIL import Image, ImageFilter
5
  import cv2
6
+ import os
7
+ import sys
8
+ import traceback
9
  from transformers import pipeline
10
 
11
+ # Configure logging to console
12
+ import logging
13
+ logging.basicConfig(level=logging.INFO,
14
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
15
+ stream=sys.stdout)
16
+ logger = logging.getLogger(__name__)
17
+
18
  # Set device to GPU if available
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ logger.info(f"Using device: {device}")
21
+
22
+ # Use smaller, more efficient models for Hugging Face Space
23
+ SEGMENTATION_MODEL = "facebook/sam-vit-base"
24
+ DEPTH_MODEL = "depth-anything/Depth-Anything-V2-Small-hf"
25
 
26
+ # Global variables for models
27
+ segmentation_pipe = None
28
+ depth_pipe = None
29
 
30
+ def load_segmentation_model():
31
+ """Load the segmentation model on demand"""
32
+ global segmentation_pipe
33
+ if segmentation_pipe is None:
34
+ try:
35
+ logger.info("Loading segmentation model...")
36
+ segmentation_pipe = pipeline("image-segmentation", model=SEGMENTATION_MODEL)
37
+ logger.info("Segmentation model loaded successfully")
38
+ except Exception as e:
39
+ logger.error(f"Error loading segmentation model: {e}")
40
+ logger.error(traceback.format_exc())
41
+ return None
42
+ return segmentation_pipe
43
+
44
+ def load_depth_model():
45
+ """Load the depth model on demand"""
46
+ global depth_pipe
47
+ if depth_pipe is None:
48
+ try:
49
+ logger.info("Loading depth estimation model...")
50
+ depth_pipe = pipeline("depth-estimation", model=DEPTH_MODEL)
51
+ logger.info("Depth estimation model loaded successfully")
52
+ except Exception as e:
53
+ logger.error(f"Error loading depth model: {e}")
54
+ logger.error(traceback.format_exc())
55
+ return None
56
+ return depth_pipe
57
 
58
  def get_segmentation_mask(input_image):
59
+ """Get segmentation mask using the segmentation pipeline"""
60
+ try:
61
+ # Load the model if not already loaded
62
+ model = load_segmentation_model()
63
+ if model is None:
64
+ logger.warning("Failed to load segmentation model, returning empty mask")
65
+ return Image.new('L', (512, 512), 0), input_image
66
+
67
+ # Process the image
68
+ input_pil = Image.fromarray(input_image).convert('RGB') if isinstance(input_image, np.ndarray) else input_image.convert('RGB')
69
+ input_pil = input_pil.resize((512, 512))
70
+
71
+ # Get segmentation results
72
+ results = model(input_pil)
73
+
74
+ # Extract mask (handling different model outputs)
75
+ if isinstance(results, list) and len(results) > 0:
76
+ if 'mask' in results[0]:
77
+ mask = results[0]['mask']
78
+ # Convert mask to proper format
79
+ if isinstance(mask, Image.Image):
80
+ mask_array = np.array(mask)
81
+ else:
82
+ mask_array = mask
83
+ mask_array = (mask_array * 255).astype(np.uint8)
84
+ else:
85
+ # Create composite mask from segmentation results
86
+ mask_array = np.zeros((512, 512), dtype=np.uint8)
87
+ for segment in results:
88
+ if 'segmentation' in segment:
89
+ mask_array = np.logical_or(mask_array, segment['segmentation']).astype(np.uint8) * 255
90
+ else:
91
+ # Create blank mask as fallback
92
+ mask_array = np.zeros((512, 512), dtype=np.uint8)
93
+
94
+ # Convert to PIL Image
95
+ mask_img = Image.fromarray(mask_array)
96
+
97
+ return mask_img, input_pil
98
 
99
+ except Exception as e:
100
+ logger.error(f"Error in segmentation: {e}")
101
+ logger.error(traceback.format_exc())
102
+ # Return a blank mask in case of error
103
+ if isinstance(input_image, np.ndarray):
104
+ return Image.new('L', (512, 512), 0), Image.fromarray(input_image).resize((512, 512))
105
+ else:
106
+ return Image.new('L', (512, 512), 0), input_image.resize((512, 512))
107
 
108
  def apply_background_blur(original_image, mask_image, sigma=15):
109
  """Apply Gaussian blur to the background using a segmentation mask"""
110
+ try:
111
+ # Convert to PIL Image if needed
112
+ if isinstance(original_image, np.ndarray):
113
+ original_image = Image.fromarray(original_image)
114
+ if isinstance(mask_image, np.ndarray):
115
+ mask_image = Image.fromarray(mask_image)
116
+
117
+ # Ensure consistent sizes
118
+ original_image = original_image.resize((512, 512)).convert('RGB')
119
+ mask_image = mask_image.resize((512, 512)).convert('L')
120
+
121
+ # Ensure mask is binary
122
+ mask_array = np.array(mask_image)
123
+ _, binary_mask = cv2.threshold(mask_array, 127, 255, cv2.THRESH_BINARY)
124
+
125
+ # Create blurred version of the original
126
+ blurred_img = original_image.filter(ImageFilter.GaussianBlur(radius=sigma))
127
+
128
+ # Convert to numpy for processing
129
+ original_array = np.array(original_image)
130
+ blurred_array = np.array(blurred_img)
131
+
132
+ # Create mask for all 3 channels
133
+ mask_3d = np.stack([binary_mask, binary_mask, binary_mask], axis=2)
134
+
135
+ # Combine original foreground with blurred background
136
+ result_array = np.where(mask_3d == 255, original_array, blurred_array)
137
+
138
+ return Image.fromarray(result_array.astype(np.uint8))
139
 
140
+ except Exception as e:
141
+ logger.error(f"Error in background blur: {e}")
142
+ logger.error(traceback.format_exc())
143
+ # Return original image in case of error
144
+ return original_image
145
 
146
  def get_depth_map(input_image):
147
+ """Get depth map using the depth estimation pipeline"""
148
+ try:
149
+ # Load model if not already loaded
150
+ model = load_depth_model()
151
+ if model is None:
152
+ logger.warning("Failed to load depth model, returning empty depth map")
153
+ return Image.new('L', (512, 512), 128), np.ones((512, 512)) * 0.5
154
+
155
+ # Convert to PIL if needed
156
+ if isinstance(input_image, np.ndarray):
157
+ input_image = Image.fromarray(input_image)
158
+
159
+ # Ensure consistent format
160
+ input_image = input_image.resize((512, 512)).convert('RGB')
161
+
162
+ # Get depth estimation
163
+ result = model(input_image)
164
+ depth_map = result["depth"]
165
+
166
+ # Convert to numpy for further processing
167
+ depth_array = np.array(depth_map)
168
+
169
+ return depth_map, depth_array
170
 
171
+ except Exception as e:
172
+ logger.error(f"Error in depth estimation: {e}")
173
+ logger.error(traceback.format_exc())
174
+ # Return default depth in case of error
175
+ return Image.new('L', (512, 512), 128), np.ones((512, 512)) * 0.5
176
 
177
  def apply_depth_based_blur(original_image, depth_array, max_blur=30):
178
  """Apply variable Gaussian blur based on depth"""
179
+ try:
180
+ # Convert to PIL if needed
181
+ if isinstance(original_image, np.ndarray):
182
+ original_image = Image.fromarray(original_image)
183
+
184
+ # Ensure consistent size
185
+ original_image = original_image.resize((512, 512)).convert('RGB')
186
+
187
+ # Handle depth array format
188
+ if len(depth_array.shape) == 3 and depth_array.shape[2] > 1:
189
+ depth_array = np.mean(depth_array, axis=2)
190
+
191
+ # Normalize depth values
192
+ depth_min = np.min(depth_array)
193
+ depth_max = np.max(depth_array)
194
+
195
+ if depth_max > depth_min:
196
+ normalized_depth = (depth_array - depth_min) / (depth_max - depth_min)
197
+ else:
198
+ # Handle case where depth is constant
199
+ normalized_depth = np.zeros_like(depth_array)
200
+
201
+ # Create progressively blurred versions of the image
202
+ blurred_images = []
203
+ for blur_amount in range(max_blur + 1):
204
+ blurred_images.append(original_image.filter(ImageFilter.GaussianBlur(radius=blur_amount)))
205
+
206
+ # Create output array
207
+ result_array = np.zeros((512, 512, 3), dtype=np.uint8)
208
+
209
+ # Apply variable blur based on depth
210
+ height, width = normalized_depth.shape
211
+ for y in range(height):
212
+ for x in range(width):
213
+ blur_radius = int(normalized_depth[y, x] * max_blur)
214
+ result_array[y, x] = np.array(blurred_images[blur_radius])[y, x]
215
+
216
+ return Image.fromarray(result_array)
217
 
218
+ except Exception as e:
219
+ logger.error(f"Error in depth-based blur: {e}")
220
+ logger.error(traceback.format_exc())
221
+ # Return original image in case of error
222
+ return original_image
223
 
224
  def process_image(input_image, blur_sigma=15, max_depth_blur=30):
225
+ """Process the image through all steps with error handling"""
226
+ try:
227
+ if input_image is None:
228
+ logger.warning("No input image provided")
229
+ return None, None, None, None, None
230
+
231
+ # Step 1: Get segmentation mask
232
+ mask, resized_image = get_segmentation_mask(input_image)
233
+
234
+ # Step 2: Apply background blur
235
+ blurred_background = apply_background_blur(resized_image, mask, sigma=blur_sigma)
236
+
237
+ # Step 3: Get depth map
238
+ depth_map, depth_array = get_depth_map(resized_image)
239
+
240
+ # Step 4: Apply depth-based blur
241
+ depth_blur = apply_depth_based_blur(resized_image, depth_array, max_blur=max_depth_blur)
242
+
243
+ # Convert to numpy arrays for Gradio
244
+ input_np = np.array(resized_image)
245
+ mask_np = np.array(mask)
246
+ blurred_np = np.array(blurred_background)
247
+ depth_map_np = np.array(depth_map)
248
+ depth_blur_np = np.array(depth_blur)
249
+
250
+ return input_np, mask_np, blurred_np, depth_map_np, depth_blur_np
251
 
252
+ except Exception as e:
253
+ logger.error(f"Error in image processing: {e}")
254
+ logger.error(traceback.format_exc())
255
+
256
+ # Create blank outputs in case of error
257
+ empty = np.zeros((512, 512, 3), dtype=np.uint8)
258
+ empty_mask = np.zeros((512, 512), dtype=np.uint8)
259
+
260
+ if input_image is not None and isinstance(input_image, np.ndarray):
261
+ img_resized = cv2.resize(input_image, (512, 512))
262
+ return img_resized, empty_mask, empty, empty_mask, empty
263
+ else:
264
+ return empty, empty_mask, empty, empty_mask, empty
265
 
266
  # Create Gradio Interface
267
+ demo = gr.Interface(
268
+ fn=process_image,
269
+ inputs=[
270
+ gr.Image(type="numpy", label="Upload Image"),
271
+ gr.Slider(minimum=1, maximum=30, value=15, step=1, label="Background Blur Strength (σ)"),
272
+ gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Max Depth Blur Strength")
273
+ ],
274
+ outputs=[
275
+ gr.Image(type="numpy", label="Original Image"),
276
+ gr.Image(type="numpy", label="Segmentation Mask"),
277
+ gr.Image(type="numpy", label="Background Blur"),
278
+ gr.Image(type="numpy", label="Depth Map"),
279
+ gr.Image(type="numpy", label="Depth-Based Lens Blur")
280
+ ],
281
+ title="Image Blur Effects - EEE 515 Assignment 3",
282
+ description="Upload an image to apply segmentation-based blur and depth-based lens blur effects",
283
+ examples=[["beach.jpeg", 15, 30]],
284
+ allow_flagging="never"
285
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  # Launch the app
288
+ if __name__ == "__main__":
289
+ demo.launch()