nsathya5 commited on
Commit
fa47ceb
·
verified ·
1 Parent(s): d460634

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -205
app.py CHANGED
@@ -1,238 +1,224 @@
1
  import gradio as gr
 
2
  import numpy as np
3
- import cv2
4
- from PIL import Image
5
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
6
 
7
- def apply_gaussian_blur(image, sigma=15):
8
- """Apply Gaussian blur with specified sigma value."""
9
- return cv2.GaussianBlur(image, (0, 0), sigma)
10
 
11
- def create_foreground_mask(image):
12
- """Create a foreground mask using simple computer vision techniques.
13
- Returns a mask where 1 is foreground and 0 is background."""
14
- # Convert to RGB if needed
15
- if len(image.shape) == 2:
16
- img_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
17
- elif image.shape[2] == 4:
18
- img_rgb = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
19
- else:
20
- img_rgb = image.copy()
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Try to use GrabCut algorithm for foreground extraction
23
- try:
24
- # Create initial mask
25
- mask = np.zeros(image.shape[:2], np.uint8)
26
-
27
- # Set a rectangle focusing on the center as probable foreground
28
- h, w = image.shape[:2]
29
- # Adjust these values to focus more on the central figure
30
- rect_w = int(w * 0.4)
31
- rect_h = int(h * 0.7)
32
- rect_x = int((w - rect_w) / 2)
33
- rect_y = int((h - rect_h) / 2)
34
- rect = (rect_x, rect_y, rect_w, rect_h)
35
-
36
- # Background and foreground models
37
- bgd_model = np.zeros((1, 65), np.float64)
38
- fgd_model = np.zeros((1, 65), np.float64)
39
-
40
- # Apply GrabCut
41
- cv2.grabCut(img_rgb, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
42
-
43
- # Create mask where certain (foreground) or probable (foreground) is 1
44
- # 0 and 2 are background, 1 and 3 are foreground
45
- mask2 = np.where((mask == 1) | (mask == 3), 1, 0).astype('float32')
46
-
47
- # If the mask is almost empty, fallback to a simpler method
48
- if mask2.sum() < (h * w * 0.05):
49
- raise Exception("GrabCut produced an empty mask")
50
-
51
- # Smooth the mask
52
- mask2 = cv2.GaussianBlur(mask2, (21, 21), 7)
53
-
54
- return mask2
55
-
56
- except Exception as e:
57
- print(f"GrabCut failed: {e}, using fallback method")
58
 
59
- # Try color-based segmentation as a fallback
60
- try:
61
- # Convert to HSV color space
62
- hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
63
-
64
- # Calculate the histogram of the central area
65
- # This assumes the foreground object is in the center
66
- center_y, center_x = h // 2, w // 2
67
- center_size = min(h, w) // 4
68
- center_y1 = max(0, center_y - center_size)
69
- center_y2 = min(h, center_y + center_size)
70
- center_x1 = max(0, center_x - center_size)
71
- center_x2 = min(w, center_x + center_size)
72
-
73
- center_hsv = hsv[center_y1:center_y2, center_x1:center_x2]
74
-
75
- # Calculate histogram of central region (assumed to be foreground)
76
- hist_hue = cv2.calcHist([center_hsv], [0], None, [180], [0, 180])
77
- hist_sat = cv2.calcHist([center_hsv], [1], None, [256], [0, 256])
78
-
79
- # Normalize histograms
80
- cv2.normalize(hist_hue, hist_hue, 0, 255, cv2.NORM_MINMAX)
81
- cv2.normalize(hist_sat, hist_sat, 0, 255, cv2.NORM_MINMAX)
82
-
83
- # Create a mask based on color similarity to center region
84
- h_channel = hsv[:,:,0]
85
- s_channel = hsv[:,:,1]
86
-
87
- h_score = hist_hue[h_channel.astype(int)]
88
- s_score = hist_sat[s_channel.astype(int)]
89
-
90
- # Combine scores
91
- total_score = (h_score.reshape(h, w) + s_score.reshape(h, w)) / 2
92
-
93
- # Normalize and threshold
94
- total_score = total_score / total_score.max()
95
- mask = (total_score > 0.4).astype(np.float32)
96
-
97
- # Smooth the mask
98
- mask = cv2.GaussianBlur(mask, (21, 21), 5)
99
-
100
- return mask
101
-
102
- except Exception as e:
103
- print(f"Color segmentation failed: {e}, using simple mask")
104
 
105
- # Final fallback: Create a simple radial mask focusing on center
106
- h, w = image.shape[:2]
107
- y, x = np.ogrid[:h, :w]
108
- center_y, center_x = h / 2, w / 2
 
 
 
109
 
110
- # Create a circular mask (foreground is in center)
111
- mask = ((x - center_x)**2 / (w/3)**2 + (y - center_y)**2 / (h/3)**2) <= 1
 
 
 
 
112
 
113
- # Convert to float and smooth edges
114
- mask = mask.astype(np.float32)
115
- mask = cv2.GaussianBlur(mask, (51, 51), 30)
116
 
117
- return mask
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- def create_depth_map(image):
120
- """Create a depth map using simple techniques.
121
- Lower values in the depth map mean closer to camera."""
122
- # Convert to grayscale for processing
123
- if len(image.shape) > 2:
124
- gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
 
 
125
  else:
126
- gray = image.copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- # 1. Use Sobel for edge detection (sharp edges = closer)
129
- sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
130
- sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
131
- gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
132
- gradient_norm = gradient_magnitude / gradient_magnitude.max()
 
 
133
 
134
- # 2. Create radial gradient from center (center = closer)
135
- h, w = image.shape[:2]
136
- y, x = np.ogrid[:h, :w]
137
- center_y, center_x = h / 2, w / 2
138
- radial = ((x - center_x)**2 / (w/2)**2 + (y - center_y)**2 / (h/2)**2)
139
- radial = np.clip(radial, 0, 1)
140
 
141
- # 3. Combine (higher value = further from camera)
142
- # Edges and center have lower values (closer)
143
- depth = 0.7 * radial + 0.3 * (1 - gradient_norm)
 
 
 
 
144
 
145
- # 4. Smooth the depth map
146
- depth = cv2.GaussianBlur(depth, (21, 21), 5)
 
147
 
148
- return depth
 
 
 
 
 
149
 
 
150
  def process_image(input_image, blur_type="Gaussian Blur", blur_intensity=15):
151
- """Process the input image and return the results.
152
- Ensures the foreground is kept sharp while background is blurred."""
153
  try:
154
- # Convert from Gradio format to numpy
155
- if isinstance(input_image, np.ndarray):
156
- img = input_image.copy()
157
- else:
158
  img = np.array(input_image)
159
-
160
- # Handle grayscale or RGBA images
 
 
161
  if img.ndim == 2: # Grayscale
162
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
163
  elif img.shape[2] == 4: # RGBA
164
- img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
165
-
166
- # 1. Get foreground mask (1 = foreground, 0 = background)
167
- mask = create_foreground_mask(img)
168
-
169
- # Visualize the mask (foreground in red)
170
- mask_vis = np.zeros_like(img)
171
- mask_vis[:,:,0] = mask * 255 # Red channel
172
-
173
- # 2. Get depth map (lower value = closer to camera)
174
- depth_map = create_depth_map(img)
175
-
176
- # Normalize depth map for visualization
177
- depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
178
- depth_vis = plt.cm.viridis(depth_norm)[:, :, :3]
179
- depth_vis = (depth_vis * 255).astype(np.uint8)
180
 
181
- # Apply appropriate blur effect
182
- result = img.copy()
183
 
 
184
  if blur_type == "Gaussian Blur":
185
- # Apply standard Gaussian blur to the entire image
186
- blurred_img = apply_gaussian_blur(img, sigma=blur_intensity)
 
187
 
188
- # Keep the foreground sharp by using the mask:
189
- # result = foreground + blurred background
190
- # = mask * original + (1-mask) * blurred
191
- for c in range(3):
192
- result[:,:,c] = mask * img[:,:,c] + (1-mask) * blurred_img[:,:,c]
193
-
194
  else: # "Depth-based Lens Blur"
195
- # For depth-based blur, we apply blur based on depth values
196
- # First, combine mask with depth (ensure foreground stays sharp)
197
- combined_depth = depth_norm.copy()
198
- # Set foreground areas to 0 in depth map (closest)
199
- combined_depth = combined_depth * (1 - mask)
200
-
201
- # Create multiple blur levels based on depth
202
- num_levels = 5
203
- for i in range(num_levels):
204
- # Calculate sigma for this level (further = more blur)
205
- sigma = blur_intensity * (i + 1) / num_levels
206
- level_blurred = apply_gaussian_blur(img, sigma=sigma)
207
-
208
- # Calculate weight for this blur level
209
- depth_min = i / num_levels
210
- depth_max = (i + 1) / num_levels
211
- weight = (combined_depth >= depth_min) & (combined_depth < depth_max)
212
-
213
- # Apply this blur level where applicable
214
- for c in range(3):
215
- result[:,:,c] = np.where(weight, level_blurred[:,:,c], result[:,:,c])
216
-
217
- # Handle max depth level
218
- max_sigma = blur_intensity
219
- max_blurred = apply_gaussian_blur(img, sigma=max_sigma)
220
- max_weight = (combined_depth >= ((num_levels-1) / num_levels))
221
- for c in range(3):
222
- result[:,:,c] = np.where(max_weight, max_blurred[:,:,c], result[:,:,c])
223
-
224
- # Ensure foreground remains completely sharp
225
- for c in range(3):
226
- result[:,:,c] = mask * img[:,:,c] + (1-mask) * result[:,:,c]
227
-
228
- # Convert to uint8 for display
229
- result = result.astype(np.uint8)
230
- mask_vis = mask_vis.astype(np.uint8)
231
 
232
- return result, mask_vis, depth_vis
233
 
234
  except Exception as e:
235
  print(f"Error processing image: {e}")
 
 
236
  # Return original image if processing fails
237
  if isinstance(input_image, np.ndarray):
238
  return input_image, input_image, input_image
@@ -269,14 +255,14 @@ with gr.Blocks(title="Image Blur Effects") as demo:
269
  apply_button = gr.Button("Apply Effect")
270
 
271
  with gr.Row():
272
- segmentation_mask = gr.Image(label="Foreground Mask")
273
  depth_map = gr.Image(label="Depth Map")
274
 
275
  # Set up the click event
276
  apply_button.click(
277
  process_image,
278
  inputs=[input_image, blur_effect_type, blur_intensity],
279
- outputs=[output_image, segmentation_mask, depth_map]
280
  )
281
 
282
  gr.Markdown("## How to Use")
@@ -288,7 +274,8 @@ with gr.Blocks(title="Image Blur Effects") as demo:
288
  gr.Markdown("### Notes")
289
  gr.Markdown("- The red areas in the Foreground Mask show what will remain sharp")
290
  gr.Markdown("- Depth-based blur creates a more realistic effect with blur increasing with distance")
291
- gr.Markdown("- For best results, ensure your subject is clearly visible in the center of the image")
 
292
 
293
  # Launch the demo
294
  demo.launch()
 
1
  import gradio as gr
2
+ import torch
3
  import numpy as np
 
 
4
  import matplotlib.pyplot as plt
5
+ from PIL import Image
6
+ from transformers import DPTImageProcessor, DPTForDepthEstimation
7
+ from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
8
+ from scipy.ndimage import gaussian_filter
9
+ import cv2
10
+ import os
11
+ import io
12
+ import time
13
 
14
+ # Load models globally to avoid reloading for each inference
15
+ print("Loading models...")
 
16
 
17
+ # Load segmentation model
18
+ try:
19
+ seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
20
+ seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
21
+ print("✓ Segmentation model loaded successfully")
22
+ except Exception as e:
23
+ print(f"! Error loading segmentation model: {e}")
24
+
25
+ # Load depth estimation model
26
+ try:
27
+ depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
28
+ depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
29
+ print("✓ Depth model loaded successfully")
30
+ except Exception as e:
31
+ print(f"! Error loading depth model: {e}")
32
+
33
+ # Function for image segmentation
34
+ def segment_image(image):
35
+ """Segment the image to extract person/foreground"""
36
+ print("Running image segmentation with Segformer...")
37
 
38
+ # Convert to PIL Image if needed
39
+ if not isinstance(image, Image.Image):
40
+ image = Image.fromarray(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # Original dimensions
43
+ original_size = image.size
44
+ model_image = image.resize((512, 512))
45
+
46
+ # Process image with model
47
+ inputs = seg_processor(images=model_image, return_tensors="pt")
48
+
49
+ # Run inference
50
+ with torch.no_grad():
51
+ outputs = seg_model(**inputs)
52
+ logits = outputs.logits
53
+
54
+ # Extract person class (class 12 in ADE20K dataset)
55
+ person_class = 12
56
+ predicted_mask = torch.argmax(logits, dim=1)
57
+ binary_mask = (predicted_mask == person_class).cpu().numpy()[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ # If person not found, try to find any prominent foreground object
60
+ if binary_mask.sum() < 100: # If almost no pixels were classified as person
61
+ # Try other common foreground classes
62
+ for cls in [13, 14, 15, 16, 17]: # Try vehicles, animals, etc.
63
+ cls_mask = (predicted_mask == cls).cpu().numpy()[0]
64
+ if cls_mask.sum() > binary_mask.sum():
65
+ binary_mask = cls_mask
66
 
67
+ # Improve mask with morphological operations
68
+ mask_small = Image.fromarray((binary_mask * 255).astype(np.uint8))
69
+ mask_cv = np.array(mask_small)
70
+ kernel = np.ones((5, 5), np.uint8)
71
+ mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_CLOSE, kernel)
72
+ mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_OPEN, kernel)
73
 
74
+ # Apply Gaussian blur to smooth the edges
75
+ mask_cv = cv2.GaussianBlur(mask_cv, (9, 9), 0)
76
+ _, mask_cv = cv2.threshold(mask_cv, 128, 255, cv2.THRESH_BINARY)
77
 
78
+ # Resize back to original image size
79
+ mask_small = Image.fromarray(mask_cv)
80
+ mask_image = mask_small.resize(original_size, Image.BICUBIC)
81
+
82
+ # Create binary mask
83
+ mask_array = np.array(mask_image) > 0
84
+
85
+ # Create colored mask for visualization
86
+ mask_rgb = np.zeros((mask_array.shape[0], mask_array.shape[1], 3), dtype=np.uint8)
87
+ mask_rgb[:,:,0] = mask_array * 255 # Red channel for visualization
88
+
89
+ return mask_array, mask_rgb
90
 
91
+ # Function to apply Gaussian blur to background
92
+ def apply_background_blur(image, mask, sigma=15):
93
+ """Apply Gaussian blur to background while keeping foreground sharp"""
94
+ print(f"Applying background blur with sigma={sigma}...")
95
+
96
+ # Convert to numpy if needed
97
+ if isinstance(image, Image.Image):
98
+ image_array = np.array(image)
99
  else:
100
+ image_array = image
101
+
102
+ # Ensure mask is binary
103
+ if mask.ndim == 3:
104
+ binary_mask = mask[:,:,0] > 0
105
+ else:
106
+ binary_mask = mask > 0
107
+
108
+ # Apply Gaussian blur to the entire image
109
+ blurred = np.zeros_like(image_array)
110
+ for c in range(3):
111
+ blurred[:, :, c] = gaussian_filter(image_array[:, :, c], sigma=sigma)
112
+
113
+ # Combine original foreground with blurred background
114
+ result = np.copy(blurred)
115
+ for c in range(3):
116
+ result[:,:,c] = np.where(binary_mask, image_array[:,:,c], blurred[:,:,c])
117
+
118
+ return result
119
+
120
+ # Function for depth estimation and depth-based blur
121
+ def apply_depth_based_blur(image, mask=None, max_sigma=15):
122
+ """Apply depth-based blur using DPT model"""
123
+ print(f"Running depth estimation and applying depth-based blur with max_sigma={max_sigma}...")
124
+
125
+ # Convert to PIL Image if needed
126
+ if not isinstance(image, Image.Image):
127
+ image = Image.fromarray(image)
128
+
129
+ # Original dimensions
130
+ original_size = image.size
131
+ model_size = (512, 512)
132
+ model_image = image.resize(model_size, Image.LANCZOS)
133
+
134
+ # Process image for depth estimation
135
+ inputs = depth_processor(images=model_image, return_tensors="pt")
136
+
137
+ # Run inference
138
+ with torch.no_grad():
139
+ outputs = depth_model(**inputs)
140
+ predicted_depth = outputs.predicted_depth
141
+
142
+ # Process depth map
143
+ depth = predicted_depth.squeeze().cpu().numpy()
144
+ depth_map = (depth - depth.min()) / (depth.max() - depth.min())
145
+
146
+ # Resize depth map to match image size
147
+ depth_pil = Image.fromarray(depth_map)
148
+ depth_map_resized = np.array(depth_pil.resize(model_size, Image.LANCZOS))
149
+
150
+ # Invert depth map (closer objects should be less blurred)
151
+ inverted_depth_map = 1.0 - depth_map_resized
152
 
153
+ # If mask is provided, ensure foreground is not blurred at all
154
+ if mask is not None:
155
+ # Resize mask to match model size
156
+ mask_pil = Image.fromarray((mask * 255).astype(np.uint8))
157
+ mask_resized = np.array(mask_pil.resize(model_size, Image.LANCZOS)) > 128
158
+ # Set depth map to 0 (no blur) for foreground pixels
159
+ inverted_depth_map = inverted_depth_map * (1 - mask_resized)
160
 
161
+ # Apply variable blur based on depth
162
+ original_array = np.array(model_image)
163
+ result_array = np.zeros_like(original_array)
 
 
 
164
 
165
+ # Apply blur with different intensities based on depth
166
+ for channel in range(3):
167
+ # Maximum blur
168
+ max_blurred = gaussian_filter(original_array[:, :, channel], sigma=max_sigma)
169
+ # Apply blur based on depth value
170
+ result_array[:, :, channel] = (1 - inverted_depth_map) * original_array[:, :, channel] + \
171
+ inverted_depth_map * max_blurred
172
 
173
+ # Resize back to original image size
174
+ depth_blur = Image.fromarray(result_array.astype(np.uint8))
175
+ depth_blur_image = depth_blur.resize(original_size, Image.LANCZOS)
176
 
177
+ # Create colored depth map for visualization
178
+ depth_map_colored = plt.cm.viridis(depth_map)[:, :, :3]
179
+ depth_map_viz = Image.fromarray((depth_map_colored * 255).astype(np.uint8))
180
+ depth_map_image = depth_map_viz.resize(original_size, Image.LANCZOS)
181
+
182
+ return np.array(depth_map_image), np.array(depth_blur_image)
183
 
184
+ # Main processing function
185
  def process_image(input_image, blur_type="Gaussian Blur", blur_intensity=15):
186
+ """Process the input image with the selected blur effect"""
 
187
  try:
188
+ # Convert from Gradio format
189
+ if not isinstance(input_image, np.ndarray):
 
 
190
  img = np.array(input_image)
191
+ else:
192
+ img = input_image.copy()
193
+
194
+ # Ensure RGB format
195
  if img.ndim == 2: # Grayscale
196
+ img = np.stack([img] * 3, axis=2)
197
  elif img.shape[2] == 4: # RGBA
198
+ img = img[:, :, :3] # Drop alpha channel
199
+
200
+ # Convert to PIL for processing
201
+ pil_img = Image.fromarray(img)
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
+ # Step 1: Get segmentation mask
204
+ mask_array, mask_viz = segment_image(pil_img)
205
 
206
+ # Step 2: Apply appropriate blur effect
207
  if blur_type == "Gaussian Blur":
208
+ # Apply regular Gaussian blur
209
+ result = apply_background_blur(pil_img, mask_array, sigma=blur_intensity)
210
+ depth_viz = np.zeros_like(img) # Placeholder for depth map
211
 
 
 
 
 
 
 
212
  else: # "Depth-based Lens Blur"
213
+ # Apply depth-based blur
214
+ depth_viz, result = apply_depth_based_blur(pil_img, mask_array, max_sigma=blur_intensity)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ return result, mask_viz, depth_viz
217
 
218
  except Exception as e:
219
  print(f"Error processing image: {e}")
220
+ import traceback
221
+ traceback.print_exc()
222
  # Return original image if processing fails
223
  if isinstance(input_image, np.ndarray):
224
  return input_image, input_image, input_image
 
255
  apply_button = gr.Button("Apply Effect")
256
 
257
  with gr.Row():
258
+ foreground_mask = gr.Image(label="Foreground Mask")
259
  depth_map = gr.Image(label="Depth Map")
260
 
261
  # Set up the click event
262
  apply_button.click(
263
  process_image,
264
  inputs=[input_image, blur_effect_type, blur_intensity],
265
+ outputs=[output_image, foreground_mask, depth_map]
266
  )
267
 
268
  gr.Markdown("## How to Use")
 
274
  gr.Markdown("### Notes")
275
  gr.Markdown("- The red areas in the Foreground Mask show what will remain sharp")
276
  gr.Markdown("- Depth-based blur creates a more realistic effect with blur increasing with distance")
277
+ gr.Markdown("- For best results, use images with clear foreground subjects")
278
+ gr.Markdown("- Created for EEE 515 Assignment (Problem 2, Part 6)")
279
 
280
  # Launch the demo
281
  demo.launch()