Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import numpy as np | |
from PIL import Image | |
import cv2 | |
from transformers import pipeline | |
import os | |
# Load models | |
def load_models(): | |
# Load segmentation model | |
segmenter = pipeline("image-segmentation", model="facebook/maskformer-swin-base-ade") | |
# Load depth estimation model | |
depth_estimator = pipeline("depth-estimation", model="intel/dpt-large") | |
return segmenter, depth_estimator | |
# Create binary mask | |
def create_binary_mask(segmentation_results, image_np, target_class="person"): | |
# Initialize empty mask with black background | |
mask = np.zeros((image_np.shape[0], image_np.shape[1]), dtype=np.uint8) | |
# Look for segments with target class | |
found = False | |
for segment in segmentation_results: | |
if target_class.lower() in segment['label'].lower(): | |
# Convert segment mask to numpy array | |
segment_mask = np.array(segment['mask']) | |
# Convert grayscale to binary (255 for white) | |
binary_mask = np.where(segment_mask > 0.5, 255, 0).astype(np.uint8) | |
# Add to overall mask | |
mask = cv2.bitwise_or(mask, binary_mask) | |
found = True | |
# If target class not found, use the largest segment | |
if not found: | |
largest_area = 0 | |
largest_mask = None | |
for segment in segmentation_results: | |
segment_mask = np.array(segment['mask']) | |
binary_mask = np.where(segment_mask > 0.5, 255, 0).astype(np.uint8) | |
area = np.sum(binary_mask > 0) | |
if area > largest_area: | |
largest_area = area | |
largest_mask = binary_mask | |
if largest_mask is not None: | |
mask = largest_mask | |
return mask | |
# Apply Gaussian blur to background | |
def apply_gaussian_blur_to_background(image_np, mask, sigma=15): | |
# Create a blurred version of the entire image | |
blurred_image = cv2.GaussianBlur(image_np, (0, 0), sigma) | |
# Ensure mask is in correct format | |
if len(mask.shape) == 3 and mask.shape[2] == 3: | |
mask_gray = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY) | |
else: | |
mask_gray = mask.copy() | |
# Normalize mask to range 0-1 | |
if mask_gray.max() > 1: | |
mask_gray = mask_gray / 255.0 | |
# Expand mask dimensions for elementwise multiplication | |
mask_3channel = np.stack([mask_gray] * 3, axis=2) | |
# Combine original foreground with blurred background | |
result = image_np * mask_3channel + blurred_image * (1 - mask_3channel) | |
result = result.astype(np.uint8) | |
return result | |
# Normalize depth map | |
def normalize_depth_map(depth_map): | |
"""Normalize depth map to range [0, 1]""" | |
depth_min = depth_map.min() | |
depth_max = depth_map.max() | |
if depth_min == depth_max: | |
return np.zeros_like(depth_map) | |
normalized_depth = (depth_map - depth_min) / (depth_max - depth_min) | |
# Explicitly invert the depth map - this is the critical fix | |
return 1.0 - normalized_depth | |
# Apply depth-based blur | |
# def apply_depth_based_blur(image, depth_map, max_blur=25): | |
# """Apply variable Gaussian blur based on depth with enhanced effect""" | |
# # Create output image | |
# result = np.zeros_like(image) | |
# # Normalize depth map | |
# normalized_depth = normalize_depth_map(depth_map) | |
# # Enhance depth contrast to make the effect more noticeable | |
# # Apply gamma correction to increase contrast between foreground and background | |
# gamma = 0.5 # Values less than 1 will enhance contrast | |
# normalized_depth = np.power(normalized_depth, gamma) | |
# # Apply blur with intensity proportional to depth | |
# for blur_size in range(1, max_blur + 1, 2): # Odd numbers for kernel size | |
# # Create a mask for pixels that should receive this blur level | |
# if blur_size == 1: | |
# mask = (normalized_depth <= blur_size / max_blur).astype(np.float32) | |
# else: | |
# lower_bound = (blur_size - 2) / max_blur | |
# upper_bound = blur_size / max_blur | |
# mask = ((normalized_depth > lower_bound) & (normalized_depth <= upper_bound)).astype(np.float32) | |
# # Skip if no pixels in this range | |
# if not np.any(mask): | |
# continue | |
# # Apply Gaussian blur with current kernel size | |
# if blur_size > 1: # No need to blur with kernel size 1 | |
# try: | |
# blurred = cv2.GaussianBlur(image, (blur_size, blur_size), 0) | |
# # Add blurred result to output image | |
# mask_3d = np.stack([mask] * 3, axis=2) | |
# result += (blurred * mask_3d).astype(np.uint8) | |
# except Exception as e: | |
# print(f"Error applying blur with size {blur_size}: {e}") | |
# continue | |
# else: | |
# # For blur_size=1, just copy the original pixels | |
# mask_3d = np.stack([mask] * 3, axis=2) | |
# result += (image * mask_3d).astype(np.uint8) | |
# return result | |
def apply_depth_based_blur(image, depth_map, max_blur=25): | |
"""Apply variable Gaussian blur based on depth with foreground in focus""" | |
# Start with a copy of the original image | |
result = image.copy().astype(float) | |
# Normalize depth map | |
normalized_depth = normalize_depth_map(depth_map) | |
# The depth map from intel/dpt-large is already set up so closer objects | |
# have lower values. We need to define a threshold to separate foreground from background. | |
foreground_threshold = 0.3 # Adjust this value based on your depth map | |
# Create increasingly blurred versions of the image | |
blurred_images = [] | |
blur_strengths = [] | |
# Generate progressively blurred versions | |
for blur_size in range(3, max_blur + 1, 4): # Use larger step for efficiency | |
blur_strengths.append(blur_size) | |
blurred = cv2.GaussianBlur(image, (blur_size, blur_size), 0) | |
blurred_images.append(blurred) | |
# Apply the appropriate blur level based on depth | |
for y in range(image.shape[0]): | |
for x in range(image.shape[1]): | |
depth_val = normalized_depth[y, x] | |
# Keep foreground sharp | |
if depth_val <= foreground_threshold: | |
continue | |
# Apply blur based on depth - background gets more blur | |
relative_depth = (depth_val - foreground_threshold) / (1 - foreground_threshold) | |
blur_index = min(int(relative_depth * len(blurred_images)), len(blurred_images) - 1) | |
# Apply the appropriate blur level | |
result[y, x] = blurred_images[blur_index][y, x] | |
return result.astype(np.uint8) | |
# Process function for Gradio | |
def process_image(input_image, blur_effect_type, blur_strength, target_class, show_depth_map=False): | |
try: | |
# Load models if not already loaded | |
if not hasattr(process_image, "models_loaded"): | |
process_image.segmenter, process_image.depth_estimator = load_models() | |
process_image.models_loaded = True | |
# Convert to numpy array | |
image_np = np.array(input_image) | |
# Process based on selected effect | |
if blur_effect_type == "Gaussian Background Blur": | |
# Segment the image | |
segmentation_results = process_image.segmenter(input_image) | |
# Create binary mask | |
binary_mask = create_binary_mask(segmentation_results, image_np, target_class) | |
# Apply Gaussian blur to background | |
result = apply_gaussian_blur_to_background(image_np, binary_mask, sigma=blur_strength) | |
return result | |
elif blur_effect_type == "Depth-Based Lens Blur": | |
# Resize for depth estimation | |
depth_input = cv2.resize(image_np, (512, 512)) | |
# Convert to PIL image for the depth estimator | |
depth_input_pil = Image.fromarray(depth_input) | |
# Get depth map | |
depth_result = process_image.depth_estimator(depth_input_pil) | |
depth_map = np.array(depth_result["depth"]) | |
# If show_depth_map is True, return a visualization of the depth map | |
if show_depth_map: | |
# Normalize depth map for visualization | |
depth_vis = normalize_depth_map(depth_map) | |
# Convert to colormap for better visualization | |
depth_colormap = cv2.applyColorMap((depth_vis * 255).astype(np.uint8), cv2.COLORMAP_PLASMA) | |
return depth_colormap | |
# Apply depth-based blur | |
result = apply_depth_based_blur(depth_input, depth_map, max_blur=blur_strength) | |
# Resize back to original dimensions if needed | |
if image_np.shape[:2] != (512, 512): | |
result = cv2.resize(result, (image_np.shape[1], image_np.shape[0])) | |
return result | |
else: | |
return image_np # Return original if no effect selected | |
except Exception as e: | |
print(f"Error in process_image: {e}") | |
# Return the original image if there's an error | |
return input_image | |
# Create Gradio interface | |
demo = gr.Blocks(title="Image Blur Effects") | |
with demo: | |
gr.Markdown("# Image Blur Effects using Segmentation and Depth Estimation") | |
gr.Markdown("Upload an image to apply different blur effects. For best results, use an image with a clear foreground subject.") | |
with gr.Row(): | |
input_image = gr.Image(label="Input Image", type="pil") | |
output_image = gr.Image(label="Output Image") | |
with gr.Row(): | |
blur_effect_type = gr.Radio( | |
["Gaussian Background Blur", "Depth-Based Lens Blur"], | |
label="Blur Effect Type", | |
value="Gaussian Background Blur" | |
) | |
blur_strength = gr.Slider( | |
minimum=5, | |
maximum=45, | |
step=2, | |
value=15, | |
label="Blur Strength" | |
) | |
target_class = gr.Textbox( | |
label="Target Class (for segmentation)", | |
value="person", | |
placeholder="e.g., person, cat, dog" | |
) | |
process_btn = gr.Button("Apply Effect") | |
process_btn.click( | |
fn=process_image, | |
inputs=[input_image, blur_effect_type, blur_strength, target_class], | |
outputs=output_image | |
) | |
error_output = gr.Textbox(label="Error Information", visible=False) | |
gr.Markdown(""" | |
## How to use: | |
1. Upload an image with a clear foreground subject | |
2. Choose a blur effect type: | |
- **Gaussian Background Blur**: Blurs the background while keeping the foreground sharp | |
- **Depth-Based Lens Blur**: Creates a realistic lens blur effect based on depth estimation | |
3. Adjust the blur strength | |
4. For Gaussian Background Blur, specify the target class to identify the foreground (e.g., person, cat, dog) | |
5. Click "Apply Effect" | |
""") | |
# Initialize models | |
segmenter, depth_estimator = load_models() | |
# Launch the app | |
demo.launch(show_error = True) | |