Spaces:

pavank007
/

image-blurr-effects

Sleeping

App Files Files Community

pavank007 commited on Mar 31

Commit

848f3c0

verified ·

1 Parent(s): ab2ada6

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -166

app.py CHANGED Viewed

@@ -1,214 +1,222 @@
 import gradio as gr
 import torch
-import cv2
 import numpy as np
 from PIL import Image
-import requests
-from io import BytesIO
-from transformers import AutoFeatureExtractor, AutoModelForSemanticSegmentation
-from transformers import AutoImageProcessor, AutoModelForDepthEstimation
-import torch.nn.functional as F
-# Define device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load segmentation model
-segmentation_model_name = "facebook/mask2former-swin-tiny-coco-instance"
-seg_feature_extractor = AutoFeatureExtractor.from_pretrained(segmentation_model_name)
-seg_model = AutoModelForSemanticSegmentation.from_pretrained(segmentation_model_name).to(device)
 # Load depth estimation model
-depth_model_name = "intel-isl/MiDaS-small"
-depth_processor = AutoImageProcessor.from_pretrained(depth_model_name)
-depth_model = AutoModelForDepthEstimation.from_pretrained(depth_model_name).to(device)
-def apply_segmentation(input_image):
-    # Convert to PIL Image if needed
-    if not isinstance(input_image, Image.Image):
-        input_image = Image.fromarray(input_image)
-    # Resize to 512x512 for consistent processing
-    input_image = input_image.resize((512, 512))
-    # Prepare image for the model
-    inputs = seg_feature_extractor(images=input_image, return_tensors="pt").to(device)
-    # Forward pass
-    with torch.no_grad():
-        outputs = seg_model(**inputs)
-    # Process output to get binary mask (foreground=1, background=0)
-    logits = outputs.logits
-    predicted_mask = torch.argmax(logits, dim=1)
-    # Convert to numpy for processing
-    mask = predicted_mask[0].cpu().numpy()
-    # Focus on person class (typically class 0 or 1 depending on the model)
-    mask = (mask > 0).astype(np.uint8) * 255
-    return np.array(input_image), mask
-def apply_depth_estimation(input_image):
-    # Convert to PIL Image if needed
-    if not isinstance(input_image, Image.Image):
-        input_image = Image.fromarray(input_image)
-    # Resize to 512x512 for consistent processing
-    input_image = input_image.resize((512, 512))
-    # Prepare image for the model
-    inputs = depth_processor(images=input_image, return_tensors="pt").to(device)
-    # Forward pass
     with torch.no_grad():
         outputs = depth_model(**inputs)
-    # Process depth map
-    depth_map = outputs.predicted_depth
-    depth_map = torch.nn.functional.interpolate(
-        depth_map.unsqueeze(1),
-        size=(512, 512),
         mode="bicubic",
         align_corners=False,
-    ).squeeze()
-    # Normalize depth map to 0-1 range
-    depth_min = torch.min(depth_map)
-    depth_max = torch.max(depth_map)
-    depth_map = (depth_map - depth_min) / (depth_max - depth_min)
-    # Convert to numpy
-    depth_map = depth_map.cpu().numpy()
-    # Convert depth to heatmap for visualization
-    depth_map_vis = (depth_map * 255).astype(np.uint8)
-    depth_map_vis = cv2.applyColorMap(depth_map_vis, cv2.COLORMAP_INFERNO)
-    return np.array(input_image), depth_map, depth_map_vis
-def apply_gaussian_blur(image, mask, sigma=15):
-    # Make a copy of the image
-    result = image.copy()
-    # Ensure mask is binary (0 or 1)
-    if mask.max() > 1:
-        mask = mask / 255.0
-    # Expand mask to 3 channels if needed
-    if len(mask.shape) == 2:
-        mask = np.expand_dims(mask, axis=2)
-        mask = np.repeat(mask, 3, axis=2)
-    # Blur the entire image
-    blurred = cv2.GaussianBlur(image, (0, 0), sigma)
-    # Combine original image (foreground) with blurred image (background) using the mask
-    result = image * mask + blurred * (1 - mask)
-    return result.astype(np.uint8)
-def apply_depth_blur(image, depth_map, max_sigma=30):
-    # Make a copy of the image
-    result = np.zeros_like(image)
-    # Ensure depth map values are between 0-1
     depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
-    # Apply variable blur based on depth
-    for sigma in range(1, max_sigma + 1):
-        # Create a mask for this depth level
-        depth_mask = ((depth_map >= (sigma - 1) / max_sigma) &
-                      (depth_map < sigma / max_sigma)).astype(np.float32)
-        # Expand mask to 3 channels if needed
-        if len(depth_mask.shape) == 2:
-            depth_mask = np.expand_dims(depth_mask, axis=2)
-            depth_mask = np.repeat(depth_mask, 3, axis=2)
-        # Apply blur with current sigma
-        current_blur = cv2.GaussianBlur(image, (0, 0), sigma)
-        # Add to result
-        result += (current_blur * depth_mask).astype(np.uint8)
-    # Handle remaining pixels (if any)
-    remaining_mask = (depth_map >= 1.0).astype(np.float32)
-    if len(remaining_mask.shape) == 2:
-        remaining_mask = np.expand_dims(remaining_mask, axis=2)
-        remaining_mask = np.repeat(remaining_mask, 3, axis=2)
-    max_blur = cv2.GaussianBlur(image, (0, 0), max_sigma)
-    result += (max_blur * remaining_mask).astype(np.uint8)
-    return result
-def process_image(input_image, blur_type, blur_strength):
-    # Convert to numpy array if needed
-    if isinstance(input_image, str):
-        # Load from URL if it's a string
-        response = requests.get(input_image)
-        input_image = Image.open(BytesIO(response.content))
-    # Resize to 512x512 for consistent processing
-    input_image = Image.fromarray(input_image).resize((512, 512))
-    input_image_np = np.array(input_image)
-    # Process based on selected blur type
-    if blur_type == "Gaussian Background Blur":
-        # Apply segmentation
-        _, mask = apply_segmentation(input_image)
-        # Apply Gaussian blur with chosen strength
-        result = apply_gaussian_blur(input_image_np, mask, sigma=blur_strength)
-        return input_image_np, result
-    elif blur_type == "Depth-based Lens Blur":
-        # Apply depth estimation
-        _, depth_map, depth_vis = apply_depth_estimation(input_image)
-        # Apply depth-based blur with chosen max strength
-        result = apply_depth_blur(input_image_np, depth_map, max_sigma=blur_strength)
-        return input_image_np, result
-# Create Gradio Interface
-with gr.Blocks(title="Image Blur Effects Demo") as app:
-    gr.Markdown("# Image Blur Effects Demo")
-    gr.Markdown("Upload an image to apply different blur effects using deep learning models")
     with gr.Row():
-        input_image = gr.Image(label="Input Image", type="numpy")
-        output_image = gr.Image(label="Output Image", type="numpy")
-    with gr.Row():
-        blur_type = gr.Radio(
-            choices=["Gaussian Background Blur", "Depth-based Lens Blur"],
-            label="Blur Effect Type",
-            value="Gaussian Background Blur"
-        )
-        blur_strength = gr.Slider(
-            minimum=1, maximum=50, value=15, step=1,
-            label="Blur Strength"
-        )
-    submit_button = gr.Button("Apply Effect")
-    submit_button.click(
         fn=process_image,
-        inputs=[input_image, blur_type, blur_strength],
-        outputs=[input_image, output_image]
     )
     gr.Markdown("""
     ## How it works
-    1. **Gaussian Background Blur**: Uses a segmentation model to detect the foreground object and applies blur to the background
-    2. **Depth-based Lens Blur**: Uses a depth estimation model to create a variable blur effect where objects further away are more blurred
-    Both models are from Hugging Face Transformers library.
     """)
-# Launch the app
-app.launch()

 import gradio as gr
 import torch
 import numpy as np
+import cv2
 from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForSemanticSegmentation
+from transformers import DPTImageProcessor, DPTForDepthEstimation
+import warnings
+warnings.filterwarnings("ignore")
 # Load segmentation model
+seg_processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-tiny-coco-instance")
+seg_model = AutoModelForSemanticSegmentation.from_pretrained("facebook/mask2former-swin-tiny-coco-instance")
 # Load depth estimation model
+depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+def apply_gaussian_blur(image, mask, sigma=15):
+    """Apply Gaussian blur to the background of an image based on a mask."""
+    # Convert mask to binary (0 and 255)
+    if mask.max() <= 1.0:
+        binary_mask = (mask * 255).astype(np.uint8)
+    else:
+        binary_mask = mask.astype(np.uint8)
+    # Create a blurred version of the entire image
+    blurred = cv2.GaussianBlur(image, (0, 0), sigma)
+    # Resize mask to match image dimensions if needed
+    if binary_mask.shape[:2] != image.shape[:2]:
+        binary_mask = cv2.resize(binary_mask, (image.shape[1], image.shape[0]))
+    # Create a 3-channel mask if the input mask is single-channel
+    if len(binary_mask.shape) == 2:
+        mask_3ch = np.stack([binary_mask, binary_mask, binary_mask], axis=2)
+    else:
+        mask_3ch = binary_mask
+    # Normalize mask to range [0, 1]
+    mask_3ch = mask_3ch / 255.0
+    # Combine original image (foreground) with blurred image (background) using the mask
+    result = image * mask_3ch + blurred * (1 - mask_3ch)
+    return result.astype(np.uint8)
+def apply_depth_blur(image, depth_map, max_sigma=25):
+    """Apply variable Gaussian blur based on depth map."""
+    # Normalize depth map to range [0, 1]
+    if depth_map.max() > 1.0:
+        depth_norm = depth_map / depth_map.max()
+    else:
+        depth_norm = depth_map
+    # Resize depth map to match image dimensions if needed
+    if depth_norm.shape[:2] != image.shape[:2]:
+        depth_norm = cv2.resize(depth_norm, (image.shape[1], image.shape[0]))
+    # Create output image
+    result = np.zeros_like(image)
+    # Apply different blur levels based on depth
+    for sigma in range(1, int(max_sigma) + 1, 2):
+        # Create a mask for pixels at this depth level
+        lower_bound = (sigma - 1) / max_sigma
+        upper_bound = (sigma + 1) / max_sigma
+        mask = np.logical_and(depth_norm >= lower_bound, depth_norm <= upper_bound).astype(np.float32)
+        # Skip if no pixels at this depth
+        if not np.any(mask):
+            continue
+        # Blur the image with current sigma
+        blurred = cv2.GaussianBlur(image, (0, 0), sigma)
+        # Create a 3-channel mask if the input mask is single-channel
+        if len(mask.shape) == 2:
+            mask_3ch = np.stack([mask, mask, mask], axis=2)
+        else:
+            mask_3ch = mask
+        # Add the blurred pixels at this depth to the result
+        result += (blurred * mask_3ch).astype(np.uint8)
+    # Fill in any missing pixels (where sum of all masks < 1)
+    total_mask = np.zeros_like(depth_norm)
+    for sigma in range(1, int(max_sigma) + 1, 2):
+        lower_bound = (sigma - 1) / max_sigma
+        upper_bound = (sigma + 1) / max_sigma
+        mask = np.logical_and(depth_norm >= lower_bound, depth_norm <= upper_bound).astype(np.float32)
+        total_mask += mask
+    missing_mask = (total_mask < 0.5).astype(np.float32)
+    if np.any(missing_mask):
+        missing_mask_3ch = np.stack([missing_mask, missing_mask, missing_mask], axis=2)
+        result += (image * missing_mask_3ch).astype(np.uint8)
+    return result
+def get_segmentation_mask(image_pil):
+    """Get segmentation mask for person class from an image."""
+    # Process the image with the segmentation model
+    inputs = seg_processor(images=image_pil, return_tensors="pt")
+    outputs = seg_model(**inputs)
+    # Get the predicted segmentation mask
+    predicted_mask = seg_processor.post_process_semantic_segmentation(outputs, target_sizes=[image_pil.size[::-1]])[0]
+    # Convert the mask to a numpy array
+    mask_np = predicted_mask.cpu().numpy()
+    # Get mask for person class (typically class 0 in COCO dataset)
+    person_mask = np.zeros_like(mask_np)
+    person_mask[mask_np == 0] = 1  # Assuming person is class 0
+    return person_mask
+def get_depth_map(image_pil):
+    """Get depth map from an image."""
+    # Process the image with the depth estimation model
+    inputs = depth_processor(images=image_pil, return_tensors="pt")
     with torch.no_grad():
         outputs = depth_model(**inputs)
+        predicted_depth = outputs.predicted_depth
+    # Interpolate to original size
+    prediction = torch.nn.functional.interpolate(
+        predicted_depth.unsqueeze(1),
+        size=image_pil.size[::-1],
         mode="bicubic",
         align_corners=False,
+    )
+    # Convert to numpy array
+    depth_map = prediction.squeeze().cpu().numpy()
+    # Normalize depth map
     depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
+    return depth_map
+def process_image(input_image, blur_sigma=15, depth_blur_sigma=25):
+    """Main function to process the input image."""
+    try:
+        # Convert to PIL Image if needed
+        if isinstance(input_image, np.ndarray):
+            pil_image = Image.fromarray(input_image)
+        else:
+            pil_image = input_image
+            input_image = np.array(pil_image)
+        # Get segmentation mask
+        seg_mask = get_segmentation_mask(pil_image)
+        # Get depth map
+        depth_map = get_depth_map(pil_image)
+        # Apply gaussian blur to background
+        gaussian_result = apply_gaussian_blur(input_image, seg_mask, sigma=blur_sigma)
+        # Apply depth-based blur
+        depth_result = apply_depth_blur(input_image, depth_map, max_sigma=depth_blur_sigma)
+        # Display depth map as an image
+        depth_visualization = (depth_map * 255).astype(np.uint8)
+        depth_visualization = cv2.applyColorMap(depth_visualization, cv2.COLORMAP_INFERNO)
+        # Display segmentation mask
+        seg_visualization = (seg_mask * 255).astype(np.uint8)
+        return [
+            input_image,
+            seg_visualization,
+            gaussian_result,
+            depth_visualization,
+            depth_result
+        ]
+    except Exception as e:
+        print(f"Error processing image: {e}")
+        return [None, None, None, None, None]
+# Create Gradio interface
+with gr.Blocks(title="Image Blur Effects with Segmentation and Depth Estimation") as demo:
+    gr.Markdown("# Image Blur Effects App")
+    gr.Markdown("This app demonstrates two types of blur effects: background blur using segmentation and depth-based lens blur.")
     with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(label="Upload an image", type="pil")
+            blur_sigma = gr.Slider(minimum=1, maximum=50, value=15, step=1, label="Background Blur Intensity")
+            depth_blur_sigma = gr.Slider(minimum=1, maximum=50, value=25, step=1, label="Depth Blur Max Intensity")
+            process_btn = gr.Button("Process Image")
+        with gr.Column():
+            with gr.Tab("Original Image"):
+                output_original = gr.Image(label="Original Image")
+            with gr.Tab("Segmentation Mask"):
+                output_segmentation = gr.Image(label="Segmentation Mask")
+            with gr.Tab("Background Blur"):
+                output_gaussian = gr.Image(label="Background Blur Result")
+            with gr.Tab("Depth Map"):
+                output_depth = gr.Image(label="Depth Map")
+            with gr.Tab("Depth-based Lens Blur"):
+                output_depth_blur = gr.Image(label="Depth-based Lens Blur Result")
+    process_btn.click(
         fn=process_image,
+        inputs=[input_image, blur_sigma, depth_blur_sigma],
+        outputs=[output_original, output_segmentation, output_gaussian, output_depth, output_depth_blur]
     )
     gr.Markdown("""
     ## How it works
+    1. **Background Blur**: Uses a segmentation model to identify foreground objects and blurs only the background
+    2. **Depth-based Lens Blur**: Uses a depth estimation model to apply variable blur based on estimated distance
+    Try uploading a photo of a person or object against a background to see the effects!
     """)
+demo.launch()