File size: 11,985 Bytes
7f69a30
fa47ceb
7f69a30
1ec7e98
fa47ceb
 
 
 
 
 
 
 
7f69a30
fa47ceb
 
7f69a30
fa47ceb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d460634
fa47ceb
 
 
d460634
fa47ceb
 
a9e0802
 
 
 
fa47ceb
 
 
 
 
 
 
 
 
 
 
 
 
7f69a30
fa47ceb
 
 
 
 
 
 
7f69a30
a9e0802
 
 
 
fa47ceb
 
 
7f69a30
a9e0802
 
fa47ceb
7f69a30
a9e0802
 
 
fa47ceb
a9e0802
 
fa47ceb
df258b2
a9e0802
df258b2
a9e0802
df258b2
 
fa47ceb
a9e0802
7f69a30
2885205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa47ceb
 
 
 
 
 
 
 
d460634
fa47ceb
 
 
 
 
 
 
 
 
a9e0802
 
fa47ceb
 
 
 
 
 
 
 
2885205
 
 
 
fa47ceb
 
 
 
 
 
 
a9e0802
2885205
 
 
 
 
fa47ceb
 
2885205
7f69a30
2885205
fa47ceb
2885205
 
 
 
 
 
 
 
 
 
fa47ceb
2885205
d460634
a9e0802
2885205
d460634
a9e0802
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f69a30
2885205
7f69a30
fa47ceb
d460634
fa47ceb
1ec7e98
fa47ceb
 
d460634
fa47ceb
 
 
 
1ec7e98
fa47ceb
1ec7e98
fa47ceb
 
 
 
1ec7e98
fa47ceb
 
d460634
2885205
 
 
 
d460634
fa47ceb
 
d460634
fa47ceb
2885205
1ec7e98
fa47ceb
1ec7e98
 
 
fa47ceb
 
1ec7e98
 
 
 
 
 
7f69a30
1ec7e98
 
 
 
 
 
7f69a30
1ec7e98
 
 
7f69a30
1ec7e98
 
 
 
 
 
 
 
 
 
 
 
 
7f69a30
 
1ec7e98
7f69a30
 
fa47ceb
1ec7e98
7f69a30
1ec7e98
 
 
 
fa47ceb
7f69a30
 
d460634
1ec7e98
 
 
 
d460634
 
df258b2
2885205
 
 
fa47ceb
7f69a30
1ec7e98
7f69a30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
import gradio as gr
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from transformers import DPTImageProcessor, DPTForDepthEstimation
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from scipy.ndimage import gaussian_filter
import cv2
import os
import io
import time

# Load models globally to avoid reloading for each inference
print("Loading models...")

# Load segmentation model
try:
    seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
    seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
    print("βœ“ Segmentation model loaded successfully")
except Exception as e:
    print(f"! Error loading segmentation model: {e}")

# Load depth estimation model
try:
    depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
    print("βœ“ Depth model loaded successfully")
except Exception as e:
    print(f"! Error loading depth model: {e}")

# Function for image segmentation
def segment_image(image):
    """Segment the image to extract person/foreground"""
    print("Running image segmentation with Segformer...")
    
    # Convert to PIL Image if needed
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
        
    # Original dimensions
    original_size = image.size
    
    # Use higher resolution for better results while staying within model limits
    model_size = (640, 640)
    model_image = image.resize(model_size, Image.LANCZOS)
    
    # Process image with model
    inputs = seg_processor(images=model_image, return_tensors="pt")
    
    # Run inference
    with torch.no_grad():
        outputs = seg_model(**inputs)
        logits = outputs.logits
    
    # Extract person class (class 12 in ADE20K dataset)
    person_class = 12
    predicted_mask = torch.argmax(logits, dim=1)
    binary_mask = (predicted_mask == person_class).cpu().numpy()[0]
    
    # If person not found, try to find any prominent foreground object
    if binary_mask.sum() < 100:  # If almost no pixels were classified as person
        # Try other common foreground classes
        for cls in [13, 14, 15, 16, 17]:  # Try vehicles, animals, etc.
            cls_mask = (predicted_mask == cls).cpu().numpy()[0]
            if cls_mask.sum() > binary_mask.sum():
                binary_mask = cls_mask
    
    # Convert to uint8 for OpenCV processing
    mask_cv = (binary_mask * 255).astype(np.uint8)
    
    # Apply morphological operations to clean up the mask
    kernel = np.ones((5, 5), np.uint8)
    mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_CLOSE, kernel)
    mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_OPEN, kernel)
    
    # Apply Gaussian blur to smooth the edges - less aggressive
    mask_cv = cv2.GaussianBlur(mask_cv, (7, 7), 0)
    _, mask_cv = cv2.threshold(mask_cv, 128, 255, cv2.THRESH_BINARY)
    
    # Resize back to original image size using bicubic interpolation for smoother results
    mask_pil = Image.fromarray(mask_cv)
    mask_resized = mask_pil.resize(original_size, Image.LANCZOS)
    
    # Convert back to numpy
    mask_array = np.array(mask_resized) > 128
    
    # Create visualization of mask (white on black background)
    mask_viz = np.zeros((mask_array.shape[0], mask_array.shape[1], 3), dtype=np.uint8)
    # Set all channels to the same value to create white
    mask_viz[:,:,0] = mask_array * 255  # Red channel
    mask_viz[:,:,1] = mask_array * 255  # Green channel
    mask_viz[:,:,2] = mask_array * 255  # Blue channel
    
    return mask_array, mask_viz

# Function to get depth map
def get_depth_map(image):
    """Get depth map from image using DPT model"""
    print("Running depth estimation...")
    
    # Convert to PIL Image if needed
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    # Original dimensions
    original_size = image.size
    
    # Higher resolution for depth estimation
    model_size = (640, 640)
    model_image = image.resize(model_size, Image.LANCZOS)
    
    # Process image for depth estimation
    inputs = depth_processor(images=model_image, return_tensors="pt")
    
    # Run inference
    with torch.no_grad():
        outputs = depth_model(**inputs)
        predicted_depth = outputs.predicted_depth
    
    # Process depth map
    depth = predicted_depth.squeeze().cpu().numpy()
    depth_map = (depth - depth.min()) / (depth.max() - depth.min())
    
    # Create colored depth map for visualization
    depth_map_colored = plt.cm.viridis(depth_map)[:, :, :3]
    depth_map_viz = Image.fromarray((depth_map_colored * 255).astype(np.uint8))
    depth_map_viz_resized = depth_map_viz.resize(original_size, Image.LANCZOS)
    
    # Return both visualization and raw depth map
    return np.array(depth_map_viz_resized), depth_map

# Function to apply Gaussian blur to background
def apply_background_blur(image, mask, sigma=15):
    """Apply Gaussian blur to background while keeping foreground sharp"""
    print(f"Applying background blur with sigma={sigma}...")
    
    # Convert to numpy if needed
    if isinstance(image, Image.Image):
        image_array = np.array(image)
    else:
        image_array = image
    
    # Ensure mask is binary
    if mask.ndim == 3:
        binary_mask = mask[:,:,0] > 0
    else:
        binary_mask = mask > 0
    
    # Apply Gaussian blur to the entire image
    # Use OpenCV for better performance on larger images
    blurred = cv2.GaussianBlur(image_array, (0, 0), sigma)
    
    # Combine original foreground with blurred background
    result = np.copy(blurred)
    for c in range(3):
        result[:,:,c] = np.where(binary_mask, image_array[:,:,c], blurred[:,:,c])
    
    return result

# Function for depth-based blur
def apply_depth_based_blur(image, mask, depth_map, max_sigma=15):
    """Apply depth-based blur using provided depth map"""
    print(f"Applying depth-based blur with max_sigma={max_sigma}...")
    
    # Convert to PIL Image if needed
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    # Original dimensions
    original_size = image.size
    
    # Resize depth map to match image size if needed
    if depth_map.shape[:2] != image.size[::-1]:
        depth_map_resized = cv2.resize(depth_map, original_size, interpolation=cv2.INTER_CUBIC)
    else:
        depth_map_resized = depth_map
    
    # Invert depth map (closer objects should be less blurred)
    inverted_depth_map = 1.0 - depth_map_resized
    
    # Apply mask to ensure foreground is not blurred
    if mask is not None:
        # Ensure mask has proper dimensions
        if isinstance(mask, np.ndarray):
            if mask.ndim == 3:
                binary_mask = mask[:,:,0] > 0
            else:
                binary_mask = mask > 0
        else:
            # Convert to numpy if needed
            binary_mask = np.array(mask) > 0
            
        # Set depth map to 0 (no blur) for foreground pixels
        inverted_depth_map = inverted_depth_map * (1 - binary_mask)
    
    # Convert to numpy array for processing
    img_array = np.array(image)
    
    # Create a progressive blur effect with multiple levels
    result = np.copy(img_array)
    
    # Apply multiple blur levels for smoother transitions
    num_levels = 8
    for i in range(num_levels):
        # Calculate blur sigma for this level
        level_sigma = max_sigma * (i + 1) / num_levels
        
        # Create a blurred version of the image at this sigma level
        level_blurred = cv2.GaussianBlur(img_array, (0, 0), level_sigma)
        
        # Calculate where to apply this blur level
        depth_min = i / num_levels
        depth_max = (i + 1) / num_levels
        
        # Create a mask for this depth range
        level_mask = (inverted_depth_map >= depth_min) & (inverted_depth_map < depth_max)
        
        # Apply this blur level
        for c in range(3):
            result[:,:,c] = np.where(level_mask, level_blurred[:,:,c], result[:,:,c])
    
    return result

# Main processing function
def process_image(input_image, blur_type="Gaussian Blur", blur_intensity=15):
    """Process the input image with the selected blur effect"""
    try:
        # Convert from Gradio format
        if not isinstance(input_image, np.ndarray):
            img = np.array(input_image)
        else:
            img = input_image.copy()
        
        # Ensure RGB format
        if img.ndim == 2:  # Grayscale
            img = np.stack([img] * 3, axis=2)
        elif img.shape[2] == 4:  # RGBA
            img = img[:, :, :3]  # Drop alpha channel
            
        # Convert to PIL for processing
        pil_img = Image.fromarray(img)
        
        # Step 1: Get segmentation mask
        mask_array, mask_viz = segment_image(pil_img)
        
        # Step 2: Always get depth map (for both blur types)
        depth_viz, depth_map = get_depth_map(pil_img)
        
        # Step 3: Apply appropriate blur effect
        if blur_type == "Gaussian Blur":
            # Apply regular Gaussian blur
            result = apply_background_blur(pil_img, mask_array, sigma=blur_intensity)
        else:  # "Depth-based Lens Blur"
            # Apply depth-based blur
            result = apply_depth_based_blur(pil_img, mask_array, depth_map, max_sigma=blur_intensity)
        
        return result, mask_viz, depth_viz
        
    except Exception as e:
        print(f"Error processing image: {e}")
        import traceback
        traceback.print_exc()
        # Return original image if processing fails
        if isinstance(input_image, np.ndarray):
            return input_image, input_image, input_image
        else:
            img = np.array(input_image)
            return img, img, img

# Create Gradio interface
with gr.Blocks(title="Image Blur Effects") as demo:
    gr.Markdown("# Image Blur Effects App")
    gr.Markdown("Upload an image to apply two types of blur effects:")
    gr.Markdown("1. **Gaussian Blur**: Blurs the background while keeping the foreground sharp")
    gr.Markdown("2. **Depth-based Lens Blur**: Applies varying blur intensities based on estimated depth")
    
    with gr.Row():
        input_image = gr.Image(label="Input Image", type="numpy")
        output_image = gr.Image(label="Output Image")
    
    with gr.Row():
        blur_effect_type = gr.Radio(
            ["Gaussian Blur", "Depth-based Lens Blur"], 
            label="Blur Effect Type",
            value="Gaussian Blur"
        )
        blur_intensity = gr.Slider(
            minimum=1, 
            maximum=30, 
            value=15, 
            step=1, 
            label="Blur Intensity"
        )
    
    with gr.Row():
        apply_button = gr.Button("Apply Effect")
    
    with gr.Row():
        foreground_mask = gr.Image(label="Foreground Mask")
        depth_map = gr.Image(label="Depth Map")
    
    # Set up the click event
    apply_button.click(
        process_image,
        inputs=[input_image, blur_effect_type, blur_intensity],
        outputs=[output_image, foreground_mask, depth_map]
    )
    
    gr.Markdown("## How to Use")
    gr.Markdown("1. Upload your image")
    gr.Markdown("2. Select blur type (Gaussian or Depth-based)")
    gr.Markdown("3. Adjust blur intensity")
    gr.Markdown("4. Click 'Apply Effect'")
    gr.Markdown("")
    gr.Markdown("### Notes")
    gr.Markdown("- The white areas in the Foreground Mask show what will remain sharp")
    gr.Markdown("- The Depth Map shows estimated distances (yellow=far, blue=close)")
    gr.Markdown("- Gaussian Blur applies uniform blur to the background")
    gr.Markdown("- Depth-based Blur varies blur intensity based on distance")
    gr.Markdown("- Created for EEE 515 Assignment (Problem 2, Part 6)")

# Launch the demo
demo.launch()