Spaces:

nsathya5
/

Background-Effects

Sleeping

App Files Files Community

nsathya5 commited on Mar 30

Commit

d460634

verified ·

1 Parent(s): 1cfe6b2

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -135

app.py CHANGED Viewed

@@ -1,83 +1,108 @@
 import gradio as gr
-import torch
 import numpy as np
 import cv2
 from PIL import Image
 import matplotlib.pyplot as plt
-from transformers import AutoFeatureExtractor, SegformerForSemanticSegmentation
-from transformers import DPTFeatureExtractor, DPTForDepthEstimation
-# Load a smaller segmentation model
-try:
-    seg_processor = AutoFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-    seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
-    print("✓ Segmentation model loaded successfully")
-except Exception as e:
-    print(f"! Error loading segmentation model: {e}")
-    # Fallback implementation will be used
-# Load a smaller depth estimation model
-try:
-    depth_processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
-    depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas")
-    print("✓ Depth model loaded successfully")
-except Exception as e:
-    print(f"! Error loading depth model: {e}")
-    # Fallback implementation will be used
 def apply_gaussian_blur(image, sigma=15):
     """Apply Gaussian blur with specified sigma value."""
     return cv2.GaussianBlur(image, (0, 0), sigma)
-def get_foreground_mask(image):
-    """Get foreground mask through simple methods if model fails."""
     try:
-        # Try using the model first
-        if seg_model is not None and seg_processor is not None:
-            # Convert to RGB if needed
-            if isinstance(image, np.ndarray):
-                if len(image.shape) == 2:
-                    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
-                elif image.shape[2] == 4:
-                    image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
-                pil_image = Image.fromarray(image)
-            else:
-                pil_image = image.convert('RGB')
-            # Prepare image for the model
-            inputs = seg_processor(images=pil_image, return_tensors="pt")
-            # Run inference
-            with torch.no_grad():
-                outputs = seg_model(**inputs)
-            # Process logits
-            logits = outputs.logits
-            upsampled_logits = torch.nn.functional.interpolate(
-                logits,
-                size=(image.shape[0], image.shape[1]),
-                mode="bilinear",
-                align_corners=False,
-            )
-            # Get mask (consider classes that are typically foreground, e.g., person)
-            # In ADE20K dataset, person is class 12
-            pred_seg = upsampled_logits.argmax(dim=1)[0]
-            mask = (pred_seg == 12).float().cpu().numpy()  # Person class
-            # If person isn't detected, try other common foreground classes
-            if mask.sum() < 100:  # If almost no pixels were classified as person
-                for cls in [13, 14, 15]:  # Try other classes like vehicle, animal, etc.
-                    cls_mask = (pred_seg == cls).float().cpu().numpy()
-                    if cls_mask.sum() > mask.sum():
-                        mask = cls_mask
-            return mask
     except Exception as e:
-        print(f"Error in segmentation: {e}")
-    # Fallback: Use a simple method - assume center of image is foreground
     h, w = image.shape[:2]
     y, x = np.ogrid[:h, :w]
     center_y, center_x = h / 2, w / 2
@@ -91,107 +116,120 @@ def get_foreground_mask(image):
     return mask
-def get_depth_map(image):
-    """Get depth map from the image using model or fallback."""
-    try:
-        # Try using the model first
-        if depth_model is not None and depth_processor is not None:
-            # Convert to RGB if needed
-            if isinstance(image, np.ndarray):
-                if len(image.shape) == 2:
-                    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
-                elif image.shape[2] == 4:
-                    image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
-                pil_image = Image.fromarray(image)
-            else:
-                pil_image = image.convert('RGB')
-            # Prepare image for the model
-            inputs = depth_processor(images=pil_image, return_tensors="pt")
-            # Run inference
-            with torch.no_grad():
-                outputs = depth_model(**inputs)
-            predicted_depth = outputs.predicted_depth
-            # Interpolate to original size if needed
-            depth_map = torch.nn.functional.interpolate(
-                predicted_depth.unsqueeze(1),
-                size=(image.shape[0], image.shape[1]),
-                mode="bicubic",
-                align_corners=False,
-            ).squeeze().cpu().numpy()
-            return depth_map
-    except Exception as e:
-        print(f"Error in depth estimation: {e}")
-    # Fallback: Create a simple depth map based on distance from center
     h, w = image.shape[:2]
     y, x = np.ogrid[:h, :w]
     center_y, center_x = h / 2, w / 2
-    # Create a radial gradient (closer to center = closer distance)
-    depth = ((x - center_x)**2 / (w/2)**2 + (y - center_y)**2 / (h/2)**2)
-    depth = np.clip(depth, 0, 1)
     return depth
-def process_image(input_image, blur_type="gaussian", blur_sigma=15):
-    """Process the input image and return the results."""
     try:
-        # Convert from Gradio format
-        img = np.array(input_image)
         if img.ndim == 2:  # Grayscale
             img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
         elif img.shape[2] == 4:  # RGBA
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
-        # 1. Get segmentation mask
-        mask = get_foreground_mask(img)
-        mask_vis = (mask * 255).astype(np.uint8)
-        mask_color = cv2.applyColorMap(mask_vis, cv2.COLORMAP_JET)
-        # 2. Get depth map
-        depth_map = get_depth_map(img)
         depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
         depth_vis = plt.cm.viridis(depth_norm)[:, :, :3]
         depth_vis = (depth_vis * 255).astype(np.uint8)
         # Apply appropriate blur effect
-        if blur_type == "gaussian":
-            # Apply regular Gaussian blur
-            blurred_img = apply_gaussian_blur(img, sigma=blur_sigma)
-            # Combine original foreground with blurred background
-            result = img.copy()
-            for c in range(3):  # For each color channel
                 result[:,:,c] = mask * img[:,:,c] + (1-mask) * blurred_img[:,:,c]
-        else:  # depth-based blur
-            # Apply depth-based blur
-            result = img.copy()
-            # Apply varying levels of blur based on depth
-            # For simplicity, we'll use 5 levels of blur
-            for i in range(1, 6):
-                sigma = blur_sigma * i / 5  # Increasing sigma value
                 level_blurred = apply_gaussian_blur(img, sigma=sigma)
                 # Calculate weight for this blur level
-                weight = (depth_norm > (i-1)/5) & (depth_norm <= i/5)
-                weight = weight.astype(np.float32)
                 # Apply this blur level where applicable
                 for c in range(3):
                     result[:,:,c] = np.where(weight, level_blurred[:,:,c], result[:,:,c])
-        # Convert to uint8
         result = result.astype(np.uint8)
-        return result, mask_color, depth_vis
     except Exception as e:
         print(f"Error processing image: {e}")
@@ -231,7 +269,7 @@ with gr.Blocks(title="Image Blur Effects") as demo:
         apply_button = gr.Button("Apply Effect")
     with gr.Row():
-        segmentation_mask = gr.Image(label="Segmentation Mask")
         depth_map = gr.Image(label="Depth Map")
     # Set up the click event
@@ -241,12 +279,16 @@ with gr.Blocks(title="Image Blur Effects") as demo:
         outputs=[output_image, segmentation_mask, depth_map]
     )
-    # Examples section
-    gr.Markdown("## How to use")
     gr.Markdown("1. Upload your image")
     gr.Markdown("2. Select blur type (Gaussian or Depth-based)")
     gr.Markdown("3. Adjust blur intensity")
     gr.Markdown("4. Click 'Apply Effect'")
 # Launch the demo
 demo.launch()

 import gradio as gr
 import numpy as np
 import cv2
 from PIL import Image
 import matplotlib.pyplot as plt
 def apply_gaussian_blur(image, sigma=15):
     """Apply Gaussian blur with specified sigma value."""
     return cv2.GaussianBlur(image, (0, 0), sigma)
+def create_foreground_mask(image):
+    """Create a foreground mask using simple computer vision techniques.
+    Returns a mask where 1 is foreground and 0 is background."""
+    # Convert to RGB if needed
+    if len(image.shape) == 2:
+        img_rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+    elif image.shape[2] == 4:
+        img_rgb = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
+    else:
+        img_rgb = image.copy()
+    # Try to use GrabCut algorithm for foreground extraction
     try:
+        # Create initial mask
+        mask = np.zeros(image.shape[:2], np.uint8)
+        # Set a rectangle focusing on the center as probable foreground
+        h, w = image.shape[:2]
+        # Adjust these values to focus more on the central figure
+        rect_w = int(w * 0.4)
+        rect_h = int(h * 0.7)
+        rect_x = int((w - rect_w) / 2)
+        rect_y = int((h - rect_h) / 2)
+        rect = (rect_x, rect_y, rect_w, rect_h)
+        # Background and foreground models
+        bgd_model = np.zeros((1, 65), np.float64)
+        fgd_model = np.zeros((1, 65), np.float64)
+        # Apply GrabCut
+        cv2.grabCut(img_rgb, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT)
+        # Create mask where certain (foreground) or probable (foreground) is 1
+        # 0 and 2 are background, 1 and 3 are foreground
+        mask2 = np.where((mask == 1) | (mask == 3), 1, 0).astype('float32')
+        # If the mask is almost empty, fallback to a simpler method
+        if mask2.sum() < (h * w * 0.05):
+            raise Exception("GrabCut produced an empty mask")
+        # Smooth the mask
+        mask2 = cv2.GaussianBlur(mask2, (21, 21), 7)
+        return mask2
     except Exception as e:
+        print(f"GrabCut failed: {e}, using fallback method")
+    # Try color-based segmentation as a fallback
+    try:
+        # Convert to HSV color space
+        hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
+        # Calculate the histogram of the central area
+        # This assumes the foreground object is in the center
+        center_y, center_x = h // 2, w // 2
+        center_size = min(h, w) // 4
+        center_y1 = max(0, center_y - center_size)
+        center_y2 = min(h, center_y + center_size)
+        center_x1 = max(0, center_x - center_size)
+        center_x2 = min(w, center_x + center_size)
+        center_hsv = hsv[center_y1:center_y2, center_x1:center_x2]
+        # Calculate histogram of central region (assumed to be foreground)
+        hist_hue = cv2.calcHist([center_hsv], [0], None, [180], [0, 180])
+        hist_sat = cv2.calcHist([center_hsv], [1], None, [256], [0, 256])
+        # Normalize histograms
+        cv2.normalize(hist_hue, hist_hue, 0, 255, cv2.NORM_MINMAX)
+        cv2.normalize(hist_sat, hist_sat, 0, 255, cv2.NORM_MINMAX)
+        # Create a mask based on color similarity to center region
+        h_channel = hsv[:,:,0]
+        s_channel = hsv[:,:,1]
+        h_score = hist_hue[h_channel.astype(int)]
+        s_score = hist_sat[s_channel.astype(int)]
+        # Combine scores
+        total_score = (h_score.reshape(h, w) + s_score.reshape(h, w)) / 2
+        # Normalize and threshold
+        total_score = total_score / total_score.max()
+        mask = (total_score > 0.4).astype(np.float32)
+        # Smooth the mask
+        mask = cv2.GaussianBlur(mask, (21, 21), 5)
+        return mask
+    except Exception as e:
+        print(f"Color segmentation failed: {e}, using simple mask")
+    # Final fallback: Create a simple radial mask focusing on center
     h, w = image.shape[:2]
     y, x = np.ogrid[:h, :w]
     center_y, center_x = h / 2, w / 2
     return mask
+def create_depth_map(image):
+    """Create a depth map using simple techniques.
+    Lower values in the depth map mean closer to camera."""
+    # Convert to grayscale for processing
+    if len(image.shape) > 2:
+        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+    else:
+        gray = image.copy()
+    # 1. Use Sobel for edge detection (sharp edges = closer)
+    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
+    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
+    gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)
+    gradient_norm = gradient_magnitude / gradient_magnitude.max()
+    # 2. Create radial gradient from center (center = closer)
     h, w = image.shape[:2]
     y, x = np.ogrid[:h, :w]
     center_y, center_x = h / 2, w / 2
+    radial = ((x - center_x)**2 / (w/2)**2 + (y - center_y)**2 / (h/2)**2)
+    radial = np.clip(radial, 0, 1)
+    # 3. Combine (higher value = further from camera)
+    # Edges and center have lower values (closer)
+    depth = 0.7 * radial + 0.3 * (1 - gradient_norm)
+    # 4. Smooth the depth map
+    depth = cv2.GaussianBlur(depth, (21, 21), 5)
     return depth
+def process_image(input_image, blur_type="Gaussian Blur", blur_intensity=15):
+    """Process the input image and return the results.
+    Ensures the foreground is kept sharp while background is blurred."""
     try:
+        # Convert from Gradio format to numpy
+        if isinstance(input_image, np.ndarray):
+            img = input_image.copy()
+        else:
+            img = np.array(input_image)
+        # Handle grayscale or RGBA images
         if img.ndim == 2:  # Grayscale
             img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
         elif img.shape[2] == 4:  # RGBA
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+        # 1. Get foreground mask (1 = foreground, 0 = background)
+        mask = create_foreground_mask(img)
+        # Visualize the mask (foreground in red)
+        mask_vis = np.zeros_like(img)
+        mask_vis[:,:,0] = mask * 255  # Red channel
+        # 2. Get depth map (lower value = closer to camera)
+        depth_map = create_depth_map(img)
+        # Normalize depth map for visualization
         depth_norm = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
         depth_vis = plt.cm.viridis(depth_norm)[:, :, :3]
         depth_vis = (depth_vis * 255).astype(np.uint8)
         # Apply appropriate blur effect
+        result = img.copy()
+        if blur_type == "Gaussian Blur":
+            # Apply standard Gaussian blur to the entire image
+            blurred_img = apply_gaussian_blur(img, sigma=blur_intensity)
+            # Keep the foreground sharp by using the mask:
+            # result = foreground + blurred background
+            # = mask * original + (1-mask) * blurred
+            for c in range(3):
                 result[:,:,c] = mask * img[:,:,c] + (1-mask) * blurred_img[:,:,c]
+        else:  # "Depth-based Lens Blur"
+            # For depth-based blur, we apply blur based on depth values
+            # First, combine mask with depth (ensure foreground stays sharp)
+            combined_depth = depth_norm.copy()
+            # Set foreground areas to 0 in depth map (closest)
+            combined_depth = combined_depth * (1 - mask)
+            # Create multiple blur levels based on depth
+            num_levels = 5
+            for i in range(num_levels):
+                # Calculate sigma for this level (further = more blur)
+                sigma = blur_intensity * (i + 1) / num_levels
                 level_blurred = apply_gaussian_blur(img, sigma=sigma)
                 # Calculate weight for this blur level
+                depth_min = i / num_levels
+                depth_max = (i + 1) / num_levels
+                weight = (combined_depth >= depth_min) & (combined_depth < depth_max)
                 # Apply this blur level where applicable
                 for c in range(3):
                     result[:,:,c] = np.where(weight, level_blurred[:,:,c], result[:,:,c])
+            # Handle max depth level
+            max_sigma = blur_intensity
+            max_blurred = apply_gaussian_blur(img, sigma=max_sigma)
+            max_weight = (combined_depth >= ((num_levels-1) / num_levels))
+            for c in range(3):
+                result[:,:,c] = np.where(max_weight, max_blurred[:,:,c], result[:,:,c])
+            # Ensure foreground remains completely sharp
+            for c in range(3):
+                result[:,:,c] = mask * img[:,:,c] + (1-mask) * result[:,:,c]
+        # Convert to uint8 for display
         result = result.astype(np.uint8)
+        mask_vis = mask_vis.astype(np.uint8)
+        return result, mask_vis, depth_vis
     except Exception as e:
         print(f"Error processing image: {e}")
         apply_button = gr.Button("Apply Effect")
     with gr.Row():
+        segmentation_mask = gr.Image(label="Foreground Mask")
         depth_map = gr.Image(label="Depth Map")
     # Set up the click event
         outputs=[output_image, segmentation_mask, depth_map]
     )
+    gr.Markdown("## How to Use")
     gr.Markdown("1. Upload your image")
     gr.Markdown("2. Select blur type (Gaussian or Depth-based)")
     gr.Markdown("3. Adjust blur intensity")
     gr.Markdown("4. Click 'Apply Effect'")
+    gr.Markdown("")
+    gr.Markdown("### Notes")
+    gr.Markdown("- The red areas in the Foreground Mask show what will remain sharp")
+    gr.Markdown("- Depth-based blur creates a more realistic effect with blur increasing with distance")
+    gr.Markdown("- For best results, ensure your subject is clearly visible in the center of the image")
 # Launch the demo
 demo.launch()