Spaces:
Sleeping
Sleeping
File size: 11,985 Bytes
7f69a30 fa47ceb 7f69a30 1ec7e98 fa47ceb 7f69a30 fa47ceb 7f69a30 fa47ceb d460634 fa47ceb d460634 fa47ceb a9e0802 fa47ceb 7f69a30 fa47ceb 7f69a30 a9e0802 fa47ceb 7f69a30 a9e0802 fa47ceb 7f69a30 a9e0802 fa47ceb a9e0802 fa47ceb df258b2 a9e0802 df258b2 a9e0802 df258b2 fa47ceb a9e0802 7f69a30 2885205 fa47ceb d460634 fa47ceb a9e0802 fa47ceb 2885205 fa47ceb a9e0802 2885205 fa47ceb 2885205 7f69a30 2885205 fa47ceb 2885205 fa47ceb 2885205 d460634 a9e0802 2885205 d460634 a9e0802 7f69a30 2885205 7f69a30 fa47ceb d460634 fa47ceb 1ec7e98 fa47ceb d460634 fa47ceb 1ec7e98 fa47ceb 1ec7e98 fa47ceb 1ec7e98 fa47ceb d460634 2885205 d460634 fa47ceb d460634 fa47ceb 2885205 1ec7e98 fa47ceb 1ec7e98 fa47ceb 1ec7e98 7f69a30 1ec7e98 7f69a30 1ec7e98 7f69a30 1ec7e98 7f69a30 1ec7e98 7f69a30 fa47ceb 1ec7e98 7f69a30 1ec7e98 fa47ceb 7f69a30 d460634 1ec7e98 d460634 df258b2 2885205 fa47ceb 7f69a30 1ec7e98 7f69a30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 |
import gradio as gr
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from transformers import DPTImageProcessor, DPTForDepthEstimation
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from scipy.ndimage import gaussian_filter
import cv2
import os
import io
import time
# Load models globally to avoid reloading for each inference
print("Loading models...")
# Load segmentation model
try:
seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
print("β Segmentation model loaded successfully")
except Exception as e:
print(f"! Error loading segmentation model: {e}")
# Load depth estimation model
try:
depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
print("β Depth model loaded successfully")
except Exception as e:
print(f"! Error loading depth model: {e}")
# Function for image segmentation
def segment_image(image):
"""Segment the image to extract person/foreground"""
print("Running image segmentation with Segformer...")
# Convert to PIL Image if needed
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Original dimensions
original_size = image.size
# Use higher resolution for better results while staying within model limits
model_size = (640, 640)
model_image = image.resize(model_size, Image.LANCZOS)
# Process image with model
inputs = seg_processor(images=model_image, return_tensors="pt")
# Run inference
with torch.no_grad():
outputs = seg_model(**inputs)
logits = outputs.logits
# Extract person class (class 12 in ADE20K dataset)
person_class = 12
predicted_mask = torch.argmax(logits, dim=1)
binary_mask = (predicted_mask == person_class).cpu().numpy()[0]
# If person not found, try to find any prominent foreground object
if binary_mask.sum() < 100: # If almost no pixels were classified as person
# Try other common foreground classes
for cls in [13, 14, 15, 16, 17]: # Try vehicles, animals, etc.
cls_mask = (predicted_mask == cls).cpu().numpy()[0]
if cls_mask.sum() > binary_mask.sum():
binary_mask = cls_mask
# Convert to uint8 for OpenCV processing
mask_cv = (binary_mask * 255).astype(np.uint8)
# Apply morphological operations to clean up the mask
kernel = np.ones((5, 5), np.uint8)
mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_CLOSE, kernel)
mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_OPEN, kernel)
# Apply Gaussian blur to smooth the edges - less aggressive
mask_cv = cv2.GaussianBlur(mask_cv, (7, 7), 0)
_, mask_cv = cv2.threshold(mask_cv, 128, 255, cv2.THRESH_BINARY)
# Resize back to original image size using bicubic interpolation for smoother results
mask_pil = Image.fromarray(mask_cv)
mask_resized = mask_pil.resize(original_size, Image.LANCZOS)
# Convert back to numpy
mask_array = np.array(mask_resized) > 128
# Create visualization of mask (white on black background)
mask_viz = np.zeros((mask_array.shape[0], mask_array.shape[1], 3), dtype=np.uint8)
# Set all channels to the same value to create white
mask_viz[:,:,0] = mask_array * 255 # Red channel
mask_viz[:,:,1] = mask_array * 255 # Green channel
mask_viz[:,:,2] = mask_array * 255 # Blue channel
return mask_array, mask_viz
# Function to get depth map
def get_depth_map(image):
"""Get depth map from image using DPT model"""
print("Running depth estimation...")
# Convert to PIL Image if needed
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Original dimensions
original_size = image.size
# Higher resolution for depth estimation
model_size = (640, 640)
model_image = image.resize(model_size, Image.LANCZOS)
# Process image for depth estimation
inputs = depth_processor(images=model_image, return_tensors="pt")
# Run inference
with torch.no_grad():
outputs = depth_model(**inputs)
predicted_depth = outputs.predicted_depth
# Process depth map
depth = predicted_depth.squeeze().cpu().numpy()
depth_map = (depth - depth.min()) / (depth.max() - depth.min())
# Create colored depth map for visualization
depth_map_colored = plt.cm.viridis(depth_map)[:, :, :3]
depth_map_viz = Image.fromarray((depth_map_colored * 255).astype(np.uint8))
depth_map_viz_resized = depth_map_viz.resize(original_size, Image.LANCZOS)
# Return both visualization and raw depth map
return np.array(depth_map_viz_resized), depth_map
# Function to apply Gaussian blur to background
def apply_background_blur(image, mask, sigma=15):
"""Apply Gaussian blur to background while keeping foreground sharp"""
print(f"Applying background blur with sigma={sigma}...")
# Convert to numpy if needed
if isinstance(image, Image.Image):
image_array = np.array(image)
else:
image_array = image
# Ensure mask is binary
if mask.ndim == 3:
binary_mask = mask[:,:,0] > 0
else:
binary_mask = mask > 0
# Apply Gaussian blur to the entire image
# Use OpenCV for better performance on larger images
blurred = cv2.GaussianBlur(image_array, (0, 0), sigma)
# Combine original foreground with blurred background
result = np.copy(blurred)
for c in range(3):
result[:,:,c] = np.where(binary_mask, image_array[:,:,c], blurred[:,:,c])
return result
# Function for depth-based blur
def apply_depth_based_blur(image, mask, depth_map, max_sigma=15):
"""Apply depth-based blur using provided depth map"""
print(f"Applying depth-based blur with max_sigma={max_sigma}...")
# Convert to PIL Image if needed
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Original dimensions
original_size = image.size
# Resize depth map to match image size if needed
if depth_map.shape[:2] != image.size[::-1]:
depth_map_resized = cv2.resize(depth_map, original_size, interpolation=cv2.INTER_CUBIC)
else:
depth_map_resized = depth_map
# Invert depth map (closer objects should be less blurred)
inverted_depth_map = 1.0 - depth_map_resized
# Apply mask to ensure foreground is not blurred
if mask is not None:
# Ensure mask has proper dimensions
if isinstance(mask, np.ndarray):
if mask.ndim == 3:
binary_mask = mask[:,:,0] > 0
else:
binary_mask = mask > 0
else:
# Convert to numpy if needed
binary_mask = np.array(mask) > 0
# Set depth map to 0 (no blur) for foreground pixels
inverted_depth_map = inverted_depth_map * (1 - binary_mask)
# Convert to numpy array for processing
img_array = np.array(image)
# Create a progressive blur effect with multiple levels
result = np.copy(img_array)
# Apply multiple blur levels for smoother transitions
num_levels = 8
for i in range(num_levels):
# Calculate blur sigma for this level
level_sigma = max_sigma * (i + 1) / num_levels
# Create a blurred version of the image at this sigma level
level_blurred = cv2.GaussianBlur(img_array, (0, 0), level_sigma)
# Calculate where to apply this blur level
depth_min = i / num_levels
depth_max = (i + 1) / num_levels
# Create a mask for this depth range
level_mask = (inverted_depth_map >= depth_min) & (inverted_depth_map < depth_max)
# Apply this blur level
for c in range(3):
result[:,:,c] = np.where(level_mask, level_blurred[:,:,c], result[:,:,c])
return result
# Main processing function
def process_image(input_image, blur_type="Gaussian Blur", blur_intensity=15):
"""Process the input image with the selected blur effect"""
try:
# Convert from Gradio format
if not isinstance(input_image, np.ndarray):
img = np.array(input_image)
else:
img = input_image.copy()
# Ensure RGB format
if img.ndim == 2: # Grayscale
img = np.stack([img] * 3, axis=2)
elif img.shape[2] == 4: # RGBA
img = img[:, :, :3] # Drop alpha channel
# Convert to PIL for processing
pil_img = Image.fromarray(img)
# Step 1: Get segmentation mask
mask_array, mask_viz = segment_image(pil_img)
# Step 2: Always get depth map (for both blur types)
depth_viz, depth_map = get_depth_map(pil_img)
# Step 3: Apply appropriate blur effect
if blur_type == "Gaussian Blur":
# Apply regular Gaussian blur
result = apply_background_blur(pil_img, mask_array, sigma=blur_intensity)
else: # "Depth-based Lens Blur"
# Apply depth-based blur
result = apply_depth_based_blur(pil_img, mask_array, depth_map, max_sigma=blur_intensity)
return result, mask_viz, depth_viz
except Exception as e:
print(f"Error processing image: {e}")
import traceback
traceback.print_exc()
# Return original image if processing fails
if isinstance(input_image, np.ndarray):
return input_image, input_image, input_image
else:
img = np.array(input_image)
return img, img, img
# Create Gradio interface
with gr.Blocks(title="Image Blur Effects") as demo:
gr.Markdown("# Image Blur Effects App")
gr.Markdown("Upload an image to apply two types of blur effects:")
gr.Markdown("1. **Gaussian Blur**: Blurs the background while keeping the foreground sharp")
gr.Markdown("2. **Depth-based Lens Blur**: Applies varying blur intensities based on estimated depth")
with gr.Row():
input_image = gr.Image(label="Input Image", type="numpy")
output_image = gr.Image(label="Output Image")
with gr.Row():
blur_effect_type = gr.Radio(
["Gaussian Blur", "Depth-based Lens Blur"],
label="Blur Effect Type",
value="Gaussian Blur"
)
blur_intensity = gr.Slider(
minimum=1,
maximum=30,
value=15,
step=1,
label="Blur Intensity"
)
with gr.Row():
apply_button = gr.Button("Apply Effect")
with gr.Row():
foreground_mask = gr.Image(label="Foreground Mask")
depth_map = gr.Image(label="Depth Map")
# Set up the click event
apply_button.click(
process_image,
inputs=[input_image, blur_effect_type, blur_intensity],
outputs=[output_image, foreground_mask, depth_map]
)
gr.Markdown("## How to Use")
gr.Markdown("1. Upload your image")
gr.Markdown("2. Select blur type (Gaussian or Depth-based)")
gr.Markdown("3. Adjust blur intensity")
gr.Markdown("4. Click 'Apply Effect'")
gr.Markdown("")
gr.Markdown("### Notes")
gr.Markdown("- The white areas in the Foreground Mask show what will remain sharp")
gr.Markdown("- The Depth Map shows estimated distances (yellow=far, blue=close)")
gr.Markdown("- Gaussian Blur applies uniform blur to the background")
gr.Markdown("- Depth-based Blur varies blur intensity based on distance")
gr.Markdown("- Created for EEE 515 Assignment (Problem 2, Part 6)")
# Launch the demo
demo.launch() |