Spaces:

nsathya5
/

Background-Effects

Running

App Files Files Community

nsathya5 commited on Mar 30

Commit

fa47ceb

verified ·

1 Parent(s): d460634

Update app.py

Browse files

Files changed (1) hide show

app.py +192 -205

app.py CHANGED Viewed

@@ -1,238 +1,224 @@
 import gradio as gr
 import numpy as np
-import cv2
-from PIL import Image
 import matplotlib.pyplot as plt
-def apply_gaussian_blur(image, sigma=15):
-    """Apply Gaussian blur with specified sigma value."""
-    return cv2.GaussianBlur(image, (0, 0), sigma)
-def create_foreground_mask(image):
-    """Create a foreground mask using simple computer vision techniques.
-    Returns a mask where 1 is foreground and 0 is background."""
-    # Convert to RGB if needed
-    if len(image.shape) == 2:
-        img_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
-    elif image.shape[2] == 4:
-        img_rgb = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
-    else:
-        img_rgb = image.copy()
-    # Try to use GrabCut algorithm for foreground extraction
-    try:
-        # Create initial mask
-        mask = np.zeros(image.shape[:2], np.uint8)
-        # Set a rectangle focusing on the center as probable foreground
-        h, w = image.shape[:2]
-        # Adjust these values to focus more on the central figure
-        rect_w = int(w * 0.4)
-        rect_h = int(h * 0.7)
-        rect_x = int((w - rect_w) / 2)
-        rect_y = int((h - rect_h) / 2)
-        rect = (rect_x, rect_y, rect_w, rect_h)
-        # Background and foreground models
-        bgd_model = np.zeros((1, 65), np.float64)
-        fgd_model = np.zeros((1, 65), np.float64)
-        # Apply GrabCut
-        cv2.grabCut(img_rgb, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
-        # Create mask where certain (foreground) or probable (foreground) is 1
-        # 0 and 2 are background, 1 and 3 are foreground
-        mask2 = np.where((mask == 1) | (mask == 3), 1, 0).astype('float32')
-        # If the mask is almost empty, fallback to a simpler method
-        if mask2.sum() < (h * w * 0.05):
-            raise Exception("GrabCut produced an empty mask")
-        # Smooth the mask
-        mask2 = cv2.GaussianBlur(mask2, (21, 21), 7)
-        return mask2
-    except Exception as e:
-        print(f"GrabCut failed: {e}, using fallback method")
-    # Try color-based segmentation as a fallback
-    try:
-        # Convert to HSV color space
-        hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
-        # Calculate the histogram of the central area
-        # This assumes the foreground object is in the center
-        center_y, center_x = h // 2, w // 2
-        center_size = min(h, w) // 4
-        center_y1 = max(0, center_y - center_size)
-        center_y2 = min(h, center_y + center_size)
-        center_x1 = max(0, center_x - center_size)
-        center_x2 = min(w, center_x + center_size)
-        center_hsv = hsv[center_y1:center_y2, center_x1:center_x2]
-        # Calculate histogram of central region (assumed to be foreground)
-        hist_hue = cv2.calcHist([center_hsv], [0], None, [180], [0, 180])
-        hist_sat = cv2.calcHist([center_hsv], [1], None, [256], [0, 256])
-        # Normalize histograms
-        cv2.normalize(hist_hue, hist_hue, 0, 255, cv2.NORM_MINMAX)
-        cv2.normalize(hist_sat, hist_sat, 0, 255, cv2.NORM_MINMAX)
-        # Create a mask based on color similarity to center region
-        h_channel = hsv[:,:,0]
-        s_channel = hsv[:,:,1]
-        h_score = hist_hue[h_channel.astype(int)]
-        s_score = hist_sat[s_channel.astype(int)]
-        # Combine scores
-        total_score = (h_score.reshape(h, w) + s_score.reshape(h, w)) / 2
-        # Normalize and threshold
-        total_score = total_score / total_score.max()
-        mask = (total_score > 0.4).astype(np.float32)
-        # Smooth the mask
-        mask = cv2.GaussianBlur(mask, (21, 21), 5)
-        return mask
-    except Exception as e:
-        print(f"Color segmentation failed: {e}, using simple mask")
-    # Final fallback: Create a simple radial mask focusing on center
-    h, w = image.shape[:2]
-    y, x = np.ogrid[:h, :w]
-    center_y, center_x = h / 2, w / 2
-    # Create a circular mask (foreground is in center)
-    mask = ((x - center_x)**2 / (w/3)**2 + (y - center_y)**2 / (h/3)**2) <= 1
-    # Convert to float and smooth edges
-    mask = mask.astype(np.float32)
-    mask = cv2.GaussianBlur(mask, (51, 51), 30)
-    return mask
-def create_depth_map(image):
-    """Create a depth map using simple techniques.
-    Lower values in the depth map mean closer to camera."""
-    # Convert to grayscale for processing
-    if len(image.shape) > 2:
-        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
     else:
-        gray = image.copy()
-    # 1. Use Sobel for edge detection (sharp edges = closer)
-    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
-    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
-    gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
-    gradient_norm = gradient_magnitude / gradient_magnitude.max()
-    # 2. Create radial gradient from center (center = closer)
-    h, w = image.shape[:2]
-    y, x = np.ogrid[:h, :w]
-    center_y, center_x = h / 2, w / 2
-    radial = ((x - center_x)**2 / (w/2)**2 + (y - center_y)**2 / (h/2)**2)
-    radial = np.clip(radial, 0, 1)
-    # 3. Combine (higher value = further from camera)
-    # Edges and center have lower values (closer)
-    depth = 0.7 * radial + 0.3 * (1 - gradient_norm)
-    # 4. Smooth the depth map
-    depth = cv2.GaussianBlur(depth, (21, 21), 5)
-    return depth
 def process_image(input_image, blur_type="Gaussian Blur", blur_intensity=15):
-    """Process the input image and return the results.
-    Ensures the foreground is kept sharp while background is blurred."""
     try:
-        # Convert from Gradio format to numpy
-        if isinstance(input_image, np.ndarray):
-            img = input_image.copy()
-        else:
             img = np.array(input_image)
-        # Handle grayscale or RGBA images
         if img.ndim == 2:  # Grayscale
-            img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
         elif img.shape[2] == 4:  # RGBA
-            img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
-        # 1. Get foreground mask (1 = foreground, 0 = background)
-        mask = create_foreground_mask(img)
-        # Visualize the mask (foreground in red)
-        mask_vis = np.zeros_like(img)
-        mask_vis[:,:,0] = mask * 255  # Red channel
-        # 2. Get depth map (lower value = closer to camera)
-        depth_map = create_depth_map(img)
-        # Normalize depth map for visualization
-        depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
-        depth_vis = plt.cm.viridis(depth_norm)[:, :, :3]
-        depth_vis = (depth_vis * 255).astype(np.uint8)
-        # Apply appropriate blur effect
-        result = img.copy()
         if blur_type == "Gaussian Blur":
-            # Apply standard Gaussian blur to the entire image
-            blurred_img = apply_gaussian_blur(img, sigma=blur_intensity)
-            # Keep the foreground sharp by using the mask:
-            # result = foreground + blurred background
-            # = mask * original + (1-mask) * blurred
-            for c in range(3):
-                result[:,:,c] = mask * img[:,:,c] + (1-mask) * blurred_img[:,:,c]
         else:  # "Depth-based Lens Blur"
-            # For depth-based blur, we apply blur based on depth values
-            # First, combine mask with depth (ensure foreground stays sharp)
-            combined_depth = depth_norm.copy()
-            # Set foreground areas to 0 in depth map (closest)
-            combined_depth = combined_depth * (1 - mask)
-            # Create multiple blur levels based on depth
-            num_levels = 5
-            for i in range(num_levels):
-                # Calculate sigma for this level (further = more blur)
-                sigma = blur_intensity * (i + 1) / num_levels
-                level_blurred = apply_gaussian_blur(img, sigma=sigma)
-                # Calculate weight for this blur level
-                depth_min = i / num_levels
-                depth_max = (i + 1) / num_levels
-                weight = (combined_depth >= depth_min) & (combined_depth < depth_max)
-                # Apply this blur level where applicable
-                for c in range(3):
-                    result[:,:,c] = np.where(weight, level_blurred[:,:,c], result[:,:,c])
-            # Handle max depth level
-            max_sigma = blur_intensity
-            max_blurred = apply_gaussian_blur(img, sigma=max_sigma)
-            max_weight = (combined_depth >= ((num_levels-1) / num_levels))
-            for c in range(3):
-                result[:,:,c] = np.where(max_weight, max_blurred[:,:,c], result[:,:,c])
-            # Ensure foreground remains completely sharp
-            for c in range(3):
-                result[:,:,c] = mask * img[:,:,c] + (1-mask) * result[:,:,c]
-        # Convert to uint8 for display
-        result = result.astype(np.uint8)
-        mask_vis = mask_vis.astype(np.uint8)
-        return result, mask_vis, depth_vis
     except Exception as e:
         print(f"Error processing image: {e}")
         # Return original image if processing fails
         if isinstance(input_image, np.ndarray):
             return input_image, input_image, input_image
@@ -269,14 +255,14 @@ with gr.Blocks(title="Image Blur Effects") as demo:
         apply_button = gr.Button("Apply Effect")
     with gr.Row():
-        segmentation_mask = gr.Image(label="Foreground Mask")
         depth_map = gr.Image(label="Depth Map")
     # Set up the click event
     apply_button.click(
         process_image,
         inputs=[input_image, blur_effect_type, blur_intensity],
-        outputs=[output_image, segmentation_mask, depth_map]
     )
     gr.Markdown("## How to Use")
@@ -288,7 +274,8 @@ with gr.Blocks(title="Image Blur Effects") as demo:
     gr.Markdown("### Notes")
     gr.Markdown("- The red areas in the Foreground Mask show what will remain sharp")
     gr.Markdown("- Depth-based blur creates a more realistic effect with blur increasing with distance")
-    gr.Markdown("- For best results, ensure your subject is clearly visible in the center of the image")
 # Launch the demo
 demo.launch()

 import gradio as gr
+import torch
 import numpy as np
 import matplotlib.pyplot as plt
+from PIL import Image
+from transformers import DPTImageProcessor, DPTForDepthEstimation
+from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
+from scipy.ndimage import gaussian_filter
+import cv2
+import os
+import io
+import time
+# Load models globally to avoid reloading for each inference
+print("Loading models...")
+# Load segmentation model
+try:
+    seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
+    seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640")
+    print("✓ Segmentation model loaded successfully")
+except Exception as e:
+    print(f"! Error loading segmentation model: {e}")
+# Load depth estimation model
+try:
+    depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
+    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
+    print("✓ Depth model loaded successfully")
+except Exception as e:
+    print(f"! Error loading depth model: {e}")
+# Function for image segmentation
+def segment_image(image):
+    """Segment the image to extract person/foreground"""
+    print("Running image segmentation with Segformer...")
+    # Convert to PIL Image if needed
+    if not isinstance(image, Image.Image):
+        image = Image.fromarray(image)
+    # Original dimensions
+    original_size = image.size
+    model_image = image.resize((512, 512))
+    # Process image with model
+    inputs = seg_processor(images=model_image, return_tensors="pt")
+    # Run inference
+    with torch.no_grad():
+        outputs = seg_model(**inputs)
+        logits = outputs.logits
+    # Extract person class (class 12 in ADE20K dataset)
+    person_class = 12
+    predicted_mask = torch.argmax(logits, dim=1)
+    binary_mask = (predicted_mask == person_class).cpu().numpy()[0]
+    # If person not found, try to find any prominent foreground object
+    if binary_mask.sum() < 100:  # If almost no pixels were classified as person
+        # Try other common foreground classes
+        for cls in [13, 14, 15, 16, 17]:  # Try vehicles, animals, etc.
+            cls_mask = (predicted_mask == cls).cpu().numpy()[0]
+            if cls_mask.sum() > binary_mask.sum():
+                binary_mask = cls_mask
+    # Improve mask with morphological operations
+    mask_small = Image.fromarray((binary_mask * 255).astype(np.uint8))
+    mask_cv = np.array(mask_small)
+    kernel = np.ones((5, 5), np.uint8)
+    mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_CLOSE, kernel)
+    mask_cv = cv2.morphologyEx(mask_cv, cv2.MORPH_OPEN, kernel)
+    # Apply Gaussian blur to smooth the edges
+    mask_cv = cv2.GaussianBlur(mask_cv, (9, 9), 0)
+    _, mask_cv = cv2.threshold(mask_cv, 128, 255, cv2.THRESH_BINARY)
+    # Resize back to original image size
+    mask_small = Image.fromarray(mask_cv)
+    mask_image = mask_small.resize(original_size, Image.BICUBIC)
+    # Create binary mask
+    mask_array = np.array(mask_image) > 0
+    # Create colored mask for visualization
+    mask_rgb = np.zeros((mask_array.shape[0], mask_array.shape[1], 3), dtype=np.uint8)
+    mask_rgb[:,:,0] = mask_array * 255  # Red channel for visualization
+    return mask_array, mask_rgb
+# Function to apply Gaussian blur to background
+def apply_background_blur(image, mask, sigma=15):
+    """Apply Gaussian blur to background while keeping foreground sharp"""
+    print(f"Applying background blur with sigma={sigma}...")
+    # Convert to numpy if needed
+    if isinstance(image, Image.Image):
+        image_array = np.array(image)
     else:
+        image_array = image
+    # Ensure mask is binary
+    if mask.ndim == 3:
+        binary_mask = mask[:,:,0] > 0
+    else:
+        binary_mask = mask > 0
+    # Apply Gaussian blur to the entire image
+    blurred = np.zeros_like(image_array)
+    for c in range(3):
+        blurred[:, :, c] = gaussian_filter(image_array[:, :, c], sigma=sigma)
+    # Combine original foreground with blurred background
+    result = np.copy(blurred)
+    for c in range(3):
+        result[:,:,c] = np.where(binary_mask, image_array[:,:,c], blurred[:,:,c])
+    return result
+# Function for depth estimation and depth-based blur
+def apply_depth_based_blur(image, mask=None, max_sigma=15):
+    """Apply depth-based blur using DPT model"""
+    print(f"Running depth estimation and applying depth-based blur with max_sigma={max_sigma}...")
+    # Convert to PIL Image if needed
+    if not isinstance(image, Image.Image):
+        image = Image.fromarray(image)
+    # Original dimensions
+    original_size = image.size
+    model_size = (512, 512)
+    model_image = image.resize(model_size, Image.LANCZOS)
+    # Process image for depth estimation
+    inputs = depth_processor(images=model_image, return_tensors="pt")
+    # Run inference
+    with torch.no_grad():
+        outputs = depth_model(**inputs)
+        predicted_depth = outputs.predicted_depth
+    # Process depth map
+    depth = predicted_depth.squeeze().cpu().numpy()
+    depth_map = (depth - depth.min()) / (depth.max() - depth.min())
+    # Resize depth map to match image size
+    depth_pil = Image.fromarray(depth_map)
+    depth_map_resized = np.array(depth_pil.resize(model_size, Image.LANCZOS))
+    # Invert depth map (closer objects should be less blurred)
+    inverted_depth_map = 1.0 - depth_map_resized
+    # If mask is provided, ensure foreground is not blurred at all
+    if mask is not None:
+        # Resize mask to match model size
+        mask_pil = Image.fromarray((mask * 255).astype(np.uint8))
+        mask_resized = np.array(mask_pil.resize(model_size, Image.LANCZOS)) > 128
+        # Set depth map to 0 (no blur) for foreground pixels
+        inverted_depth_map = inverted_depth_map * (1 - mask_resized)
+    # Apply variable blur based on depth
+    original_array = np.array(model_image)
+    result_array = np.zeros_like(original_array)
+    # Apply blur with different intensities based on depth
+    for channel in range(3):
+        # Maximum blur
+        max_blurred = gaussian_filter(original_array[:, :, channel], sigma=max_sigma)
+        # Apply blur based on depth value
+        result_array[:, :, channel] = (1 - inverted_depth_map) * original_array[:, :, channel] + \
+                                      inverted_depth_map * max_blurred
+    # Resize back to original image size
+    depth_blur = Image.fromarray(result_array.astype(np.uint8))
+    depth_blur_image = depth_blur.resize(original_size, Image.LANCZOS)
+    # Create colored depth map for visualization
+    depth_map_colored = plt.cm.viridis(depth_map)[:, :, :3]
+    depth_map_viz = Image.fromarray((depth_map_colored * 255).astype(np.uint8))
+    depth_map_image = depth_map_viz.resize(original_size, Image.LANCZOS)
+    return np.array(depth_map_image), np.array(depth_blur_image)
+# Main processing function
 def process_image(input_image, blur_type="Gaussian Blur", blur_intensity=15):
+    """Process the input image with the selected blur effect"""
     try:
+        # Convert from Gradio format
+        if not isinstance(input_image, np.ndarray):
             img = np.array(input_image)
+        else:
+            img = input_image.copy()
+        # Ensure RGB format
         if img.ndim == 2:  # Grayscale
+            img = np.stack([img] * 3, axis=2)
         elif img.shape[2] == 4:  # RGBA
+            img = img[:, :, :3]  # Drop alpha channel
+        # Convert to PIL for processing
+        pil_img = Image.fromarray(img)
+        # Step 1: Get segmentation mask
+        mask_array, mask_viz = segment_image(pil_img)
+        # Step 2: Apply appropriate blur effect
         if blur_type == "Gaussian Blur":
+            # Apply regular Gaussian blur
+            result = apply_background_blur(pil_img, mask_array, sigma=blur_intensity)
+            depth_viz = np.zeros_like(img)  # Placeholder for depth map
         else:  # "Depth-based Lens Blur"
+            # Apply depth-based blur
+            depth_viz, result = apply_depth_based_blur(pil_img, mask_array, max_sigma=blur_intensity)
+        return result, mask_viz, depth_viz
     except Exception as e:
         print(f"Error processing image: {e}")
+        import traceback
+        traceback.print_exc()
         # Return original image if processing fails
         if isinstance(input_image, np.ndarray):
             return input_image, input_image, input_image
         apply_button = gr.Button("Apply Effect")
     with gr.Row():
+        foreground_mask = gr.Image(label="Foreground Mask")
         depth_map = gr.Image(label="Depth Map")
     # Set up the click event
     apply_button.click(
         process_image,
         inputs=[input_image, blur_effect_type, blur_intensity],
+        outputs=[output_image, foreground_mask, depth_map]
     )
     gr.Markdown("## How to Use")
     gr.Markdown("### Notes")
     gr.Markdown("- The red areas in the Foreground Mask show what will remain sharp")
     gr.Markdown("- Depth-based blur creates a more realistic effect with blur increasing with distance")
+    gr.Markdown("- For best results, use images with clear foreground subjects")
+    gr.Markdown("- Created for EEE 515 Assignment (Problem 2, Part 6)")
 # Launch the demo
 demo.launch()