Spaces:

nsathya5
/

Background-Effects

Sleeping

App Files Files Community

nsathya5 commited on Mar 30

Commit

1ec7e98

verified ·

1 Parent(s): e3801a2

Update app.py

Browse files

Files changed (1) hide show

app.py +221 -163

app.py CHANGED Viewed

@@ -3,192 +3,250 @@ import torch
 import numpy as np
 import cv2
 from PIL import Image
-from transformers import SegformerForSemanticSegmentation, SegformerFeatureExtractor
-from transformers import DPTForDepthEstimation, DPTFeatureExtractor
-from scipy.ndimage import gaussian_filter
-# Load segmentation model
-segmentation_model_name = "nvidia/segformer-b0-finetuned-ade-512-512"
-segmentation_feature_extractor = SegformerFeatureExtractor.from_pretrained(segmentation_model_name)
-segmentation_model = SegformerForSemanticSegmentation.from_pretrained(segmentation_model_name)
-# Load depth estimation model
-depth_model_name = "Intel/dpt-large"
-depth_feature_extractor = DPTFeatureExtractor.from_pretrained(depth_model_name)
-depth_model = DPTForDepthEstimation.from_pretrained(depth_model_name)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-segmentation_model.to(device)
-depth_model.to(device)
-def apply_segmentation(image):
-    # Preprocess
-    inputs = segmentation_feature_extractor(images=image, return_tensors="pt")
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Forward pass
-    with torch.no_grad():
-        outputs = segmentation_model(**inputs)
-        logits = outputs.logits
-    # Convert logits to segmentation mask
-    segmentation_mask = torch.argmax(logits, dim=1)[0].cpu().numpy()
-    # Create a binary mask for foreground (assume person class is usually indexed as 15)
-    # For simplicity, consider everything as foreground (non-zero values)
-    binary_mask = (segmentation_mask > 0).astype(np.uint8) * 255
-    # Resize binary mask to match input image if needed
-    if binary_mask.shape[:2] != image.size[::-1]:
-        binary_mask = cv2.resize(binary_mask, image.size[::-1])
-    return binary_mask
-def apply_depth_estimation(image):
-    # Preprocess
-    inputs = depth_feature_extractor(images=image, return_tensors="pt")
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Forward pass
-    with torch.no_grad():
-        outputs = depth_model(**inputs)
-        predicted_depth = outputs.predicted_depth
-    # Convert to numpy
-    depth_map = predicted_depth[0].cpu().numpy()
-    # Normalize depth map for visualization
-    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
-    depth_map = (depth_map * 255).astype(np.uint8)
-    # Resize depth map to match input image if needed
-    if depth_map.shape[:2] != image.size[::-1]:
-        depth_map = cv2.resize(depth_map, image.size[::-1])
-    return depth_map
-def apply_gaussian_blur(image, mask, sigma=15):
-    # Convert PIL image to numpy array
-    img_np = np.array(image)
-    # Create blurred version of the entire image
-    blurred = gaussian_filter(img_np, sigma=(sigma, sigma, 0))
-    # Make sure mask has the right shape
-    if len(mask.shape) == 2:
-        mask = np.expand_dims(mask, -1)
-    if mask.shape[2] == 1:
-        mask = np.repeat(mask, 3, axis=2)
-    # Normalize mask to [0, 1]
-    normalized_mask = mask.astype(float) / 255
-    # Combine original and blurred images based on mask
-    result = img_np * normalized_mask + blurred * (1 - normalized_mask)
-    result = result.astype(np.uint8)
-    return Image.fromarray(result)
-def apply_depth_blur(image, depth_map, max_sigma=30):
-    # Convert PIL image to numpy array
-    img_np = np.array(image)
-    # Make sure depth map has the right shape
-    if len(depth_map.shape) == 3 and depth_map.shape[2] > 1:
-        depth_map = cv2.cvtColor(depth_map, cv2.COLOR_RGB2GRAY)
-    # Create normalized depth map for blur strength calculation
-    # Invert depth map so that closer objects have smaller sigma values
-    depth_normalized = 1.0 - (depth_map.astype(float) / 255.0)
-    # For each pixel, apply Gaussian blur with varying sigma
-    height, width = img_np.shape[:2]
-    result = np.zeros_like(img_np)
-    # This is a simplified approach - ideally we'd use a more efficient method
-    # Create multiple blurred versions of the image with different sigma values
-    blur_layers = []
-    sigma_values = np.linspace(1, max_sigma, 10)
-    for sigma in sigma_values:
-        blur_layers.append(gaussian_filter(img_np, sigma=(sigma, sigma, 0)))
-    # Interpolate between blur layers based on depth
-    for y in range(height):
-        for x in range(width):
-            # Map depth value to index in blur_layers
-            depth_val = depth_normalized[y, x]
-            idx = int(depth_val * (len(blur_layers) - 1))
-            # Ensure valid index
-            idx = max(0, min(idx, len(blur_layers) - 1))
-            result[y, x] = blur_layers[idx][y, x]
-    return Image.fromarray(result)
-def process_image(input_image, blur_type, blur_intensity):
-    # Apply segmentation
-    mask = apply_segmentation(input_image)
-    # Apply depth estimation
-    depth_map = apply_depth_estimation(input_image)
-    # Apply appropriate blur effect
-    if blur_type == "Gaussian Blur":
-        output_image = apply_gaussian_blur(input_image, mask, sigma=blur_intensity)
-    else:  # "Depth-based Lens Blur"
-        output_image = apply_depth_blur(input_image, depth_map, max_sigma=blur_intensity)
-    # Create visualization of segmentation mask
-    mask_visualized = Image.fromarray(mask).convert("RGB")
-    # Create visualization of depth map
-    depth_visualized = Image.fromarray(depth_map).convert("RGB")
-    return output_image, mask_visualized, depth_visualized
-# Create the Gradio interface
-with gr.Blocks(title="Background Blur Effects") as demo:
-    gr.Markdown("# Background Blur Effects using Vision Transformers")
-    gr.Markdown("""
-        This application demonstrates two types of background blur effects:
-        1. **Gaussian Blur**: Simple blur applied to the background using segmentation
-        2. **Depth-based Lens Blur**: Realistic lens blur effect based on depth estimation
-    """)
     with gr.Row():
-        with gr.Column():
-            input_image = gr.Image(label="Input Image", type="pil")
-            blur_type = gr.Radio(
-                ["Gaussian Blur", "Depth-based Lens Blur"],
-                label="Blur Effect Type",
-                value="Gaussian Blur"
-            )
-            blur_intensity = gr.Slider(
-                minimum=1, maximum=30, value=15,
-                label="Blur Intensity", step=1
-            )
-            submit_btn = gr.Button("Apply Effect")
-        with gr.Column():
-            output_image = gr.Image(label="Output Image", type="pil")
     with gr.Row():
-        segmentation_mask = gr.Image(label="Segmentation Mask", type="pil")
-        depth_map = gr.Image(label="Depth Map", type="pil")
-    submit_btn.click(
-        process_image,
-        inputs=[input_image, blur_type, blur_intensity],
         outputs=[output_image, segmentation_mask, depth_map]
     )
-    gr.Markdown("""
-        ### How it works
-        1. **Segmentation**: Identifies foreground objects using SegFormer
-        2. **Depth Estimation**: Generates a depth map using DPT
-        3. **Blur Application**: Applies blur effects based on segmentation and depth information
-        *Created for EEE 515 Assignment 3*
-    """)
-# Launch the app
 demo.launch()

 import numpy as np
 import cv2
 from PIL import Image
+import matplotlib.pyplot as plt
+from transformers import AutoFeatureExtractor, SegformerForSemanticSegmentation
+from transformers import DPTFeatureExtractor, DPTForDepthEstimation
+# Load a smaller segmentation model
+try:
+    seg_processor = AutoFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+    seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
+    print("✓ Segmentation model loaded successfully")
+except Exception as e:
+    print(f"! Error loading segmentation model: {e}")
+    # Fallback implementation will be used
+# Load a smaller depth estimation model
+try:
+    depth_processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
+    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas")
+    print("✓ Depth model loaded successfully")
+except Exception as e:
+    print(f"! Error loading depth model: {e}")
+    # Fallback implementation will be used
+def apply_gaussian_blur(image, sigma=15):
+    """Apply Gaussian blur with specified sigma value."""
+    return cv2.GaussianBlur(image, (0, 0), sigma)
+def get_foreground_mask(image):
+    """Get foreground mask through simple methods if model fails."""
+    try:
+        # Try using the model first
+        if seg_model is not None and seg_processor is not None:
+            # Convert to RGB if needed
+            if isinstance(image, np.ndarray):
+                if len(image.shape) == 2:
+                    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+                elif image.shape[2] == 4:
+                    image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
+                pil_image = Image.fromarray(image)
+            else:
+                pil_image = image.convert('RGB')
+            # Prepare image for the model
+            inputs = seg_processor(images=pil_image, return_tensors="pt")
+            # Run inference
+            with torch.no_grad():
+                outputs = seg_model(**inputs)
+            # Process logits
+            logits = outputs.logits
+            upsampled_logits = torch.nn.functional.interpolate(
+                logits,
+                size=(image.shape[0], image.shape[1]),
+                mode="bilinear",
+                align_corners=False,
+            )
+            # Get mask (consider classes that are typically foreground, e.g., person)
+            # In ADE20K dataset, person is class 12
+            pred_seg = upsampled_logits.argmax(dim=1)[0]
+            mask = (pred_seg == 12).float().cpu().numpy()  # Person class
+            # If person isn't detected, try other common foreground classes
+            if mask.sum() < 100:  # If almost no pixels were classified as person
+                for cls in [13, 14, 15]:  # Try other classes like vehicle, animal, etc.
+                    cls_mask = (pred_seg == cls).float().cpu().numpy()
+                    if cls_mask.sum() > mask.sum():
+                        mask = cls_mask
+            return mask
+    except Exception as e:
+        print(f"Error in segmentation: {e}")
+    # Fallback: Use a simple method - assume center of image is foreground
+    h, w = image.shape[:2]
+    y, x = np.ogrid[:h, :w]
+    center_y, center_x = h / 2, w / 2
+    # Create a circular mask (foreground is in center)
+    mask = ((x - center_x)**2 / (w/3)**2 + (y - center_y)**2 / (h/3)**2) <= 1
+    # Convert to float and smooth edges
+    mask = mask.astype(np.float32)
+    mask = cv2.GaussianBlur(mask, (51, 51), 30)
+    return mask
+def get_depth_map(image):
+    """Get depth map from the image using model or fallback."""
+    try:
+        # Try using the model first
+        if depth_model is not None and depth_processor is not None:
+            # Convert to RGB if needed
+            if isinstance(image, np.ndarray):
+                if len(image.shape) == 2:
+                    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+                elif image.shape[2] == 4:
+                    image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
+                pil_image = Image.fromarray(image)
+            else:
+                pil_image = image.convert('RGB')
+            # Prepare image for the model
+            inputs = depth_processor(images=pil_image, return_tensors="pt")
+            # Run inference
+            with torch.no_grad():
+                outputs = depth_model(**inputs)
+            predicted_depth = outputs.predicted_depth
+            # Interpolate to original size if needed
+            depth_map = torch.nn.functional.interpolate(
+                predicted_depth.unsqueeze(1),
+                size=(image.shape[0], image.shape[1]),
+                mode="bicubic",
+                align_corners=False,
+            ).squeeze().cpu().numpy()
+            return depth_map
+    except Exception as e:
+        print(f"Error in depth estimation: {e}")
+    # Fallback: Create a simple depth map based on distance from center
+    h, w = image.shape[:2]
+    y, x = np.ogrid[:h, :w]
+    center_y, center_x = h / 2, w / 2
+    # Create a radial gradient (closer to center = closer distance)
+    depth = ((x - center_x)**2 / (w/2)**2 + (y - center_y)**2 / (h/2)**2)
+    depth = np.clip(depth, 0, 1)
+    return depth
+def process_image(input_image, blur_type="gaussian", blur_sigma=15):
+    """Process the input image and return the results."""
+    try:
+        # Convert from Gradio format
+        img = np.array(input_image)
+        if img.ndim == 2:  # Grayscale
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+        elif img.shape[2] == 4:  # RGBA
+            img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+        # 1. Get segmentation mask
+        mask = get_foreground_mask(img)
+        mask_vis = (mask * 255).astype(np.uint8)
+        mask_color = cv2.applyColorMap(mask_vis, cv2.COLORMAP_JET)
+        # 2. Get depth map
+        depth_map = get_depth_map(img)
+        depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
+        depth_vis = plt.cm.viridis(depth_norm)[:, :, :3]
+        depth_vis = (depth_vis * 255).astype(np.uint8)
+        # Apply appropriate blur effect
+        if blur_type == "gaussian":
+            # Apply regular Gaussian blur
+            blurred_img = apply_gaussian_blur(img, sigma=blur_sigma)
+            # Combine original foreground with blurred background
+            result = img.copy()
+            for c in range(3):  # For each color channel
+                result[:,:,c] = mask * img[:,:,c] + (1-mask) * blurred_img[:,:,c]
+        else:  # depth-based blur
+            # Apply depth-based blur
+            result = img.copy()
+            # Apply varying levels of blur based on depth
+            # For simplicity, we'll use 5 levels of blur
+            for i in range(1, 6):
+                sigma = blur_sigma * i / 5  # Increasing sigma value
+                level_blurred = apply_gaussian_blur(img, sigma=sigma)
+                # Calculate weight for this blur level
+                weight = (depth_norm > (i-1)/5) & (depth_norm <= i/5)
+                weight = weight.astype(np.float32)
+                # Apply this blur level where applicable
+                for c in range(3):
+                    result[:,:,c] = np.where(weight, level_blurred[:,:,c], result[:,:,c])
+        # Convert to uint8
+        result = result.astype(np.uint8)
+        return result, mask_color, depth_vis
+    except Exception as e:
+        print(f"Error processing image: {e}")
+        # Return original image if processing fails
+        if isinstance(input_image, np.ndarray):
+            return input_image, input_image, input_image
+        else:
+            img = np.array(input_image)
+            return img, img, img
+# Create Gradio interface
+with gr.Blocks(title="Image Blur Effects") as demo:
+    gr.Markdown("# Image Blur Effects App")
+    gr.Markdown("Upload an image to apply two types of blur effects:")
+    gr.Markdown("1. **Gaussian Blur**: Blurs the background while keeping the foreground sharp")
+    gr.Markdown("2. **Depth-based Lens Blur**: Applies varying blur intensities based on estimated depth")
+    with gr.Row():
+        input_image = gr.Image(label="Input Image", type="numpy")
+        output_image = gr.Image(label="Output Image")
+    with gr.Row():
+        blur_effect_type = gr.Radio(
+            ["Gaussian Blur", "Depth-based Lens Blur"],
+            label="Blur Effect Type",
+            value="Gaussian Blur"
+        )
+        blur_intensity = gr.Slider(
+            minimum=1,
+            maximum=30,
+            value=15,
+            step=1,
+            label="Blur Intensity"
+        )
     with gr.Row():
+        apply_button = gr.Button("Apply Effect")
     with gr.Row():
+        segmentation_mask = gr.Image(label="Segmentation Mask")
+        depth_map = gr.Image(label="Depth Map")
+    # Set up the click event
+    apply_button.click(
+        process_image,
+        inputs=[input_image, blur_effect_type, blur_intensity],
         outputs=[output_image, segmentation_mask, depth_map]
     )
+    # Examples section
+    gr.Markdown("## How to use")
+    gr.Markdown("1. Upload your image")
+    gr.Markdown("2. Select blur type (Gaussian or Depth-based)")
+    gr.Markdown("3. Adjust blur intensity")
+    gr.Markdown("4. Click 'Apply Effect'")
+# Launch the demo
 demo.launch()