Spaces:

pavank007
/

image-blurr-effects

Sleeping

App Files Files Community

pavank007 commited on Mar 31

Commit

5d770da

verified ·

1 Parent(s): 23bfbfd

Update app.py

Browse files

Files changed (1) hide show

app.py +190 -265

app.py CHANGED Viewed

@@ -1,289 +1,214 @@
 import gradio as gr
-import numpy as np
 import torch
-from PIL import Image, ImageFilter
 import cv2
-import os
-import sys
-import traceback
-from transformers import pipeline
-# Configure logging to console
-import logging
-logging.basicConfig(level=logging.INFO,
-                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-                    stream=sys.stdout)
-logger = logging.getLogger(__name__)
-# Set device to GPU if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-logger.info(f"Using device: {device}")
-# Use smaller, more efficient models for Hugging Face Space
-SEGMENTATION_MODEL = "facebook/sam-vit-base"
-DEPTH_MODEL = "depth-anything/Depth-Anything-V2-Small-hf"
-# Global variables for models
-segmentation_pipe = None
-depth_pipe = None
-def load_segmentation_model():
-    """Load the segmentation model on demand"""
-    global segmentation_pipe
-    if segmentation_pipe is None:
-        try:
-            logger.info("Loading segmentation model...")
-            segmentation_pipe = pipeline("image-segmentation", model=SEGMENTATION_MODEL)
-            logger.info("Segmentation model loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading segmentation model: {e}")
-            logger.error(traceback.format_exc())
-            return None
-    return segmentation_pipe
-def load_depth_model():
-    """Load the depth model on demand"""
-    global depth_pipe
-    if depth_pipe is None:
-        try:
-            logger.info("Loading depth estimation model...")
-            depth_pipe = pipeline("depth-estimation", model=DEPTH_MODEL)
-            logger.info("Depth estimation model loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading depth model: {e}")
-            logger.error(traceback.format_exc())
-            return None
-    return depth_pipe
-def get_segmentation_mask(input_image):
-    """Get segmentation mask using the segmentation pipeline"""
-    try:
-        # Load the model if not already loaded
-        model = load_segmentation_model()
-        if model is None:
-            logger.warning("Failed to load segmentation model, returning empty mask")
-            return Image.new('L', (512, 512), 0), input_image
-        # Process the image
-        input_pil = Image.fromarray(input_image).convert('RGB') if isinstance(input_image, np.ndarray) else input_image.convert('RGB')
-        input_pil = input_pil.resize((512, 512))
-        # Get segmentation results
-        results = model(input_pil)
-        # Extract mask (handling different model outputs)
-        if isinstance(results, list) and len(results) > 0:
-            if 'mask' in results[0]:
-                mask = results[0]['mask']
-                # Convert mask to proper format
-                if isinstance(mask, Image.Image):
-                    mask_array = np.array(mask)
-                else:
-                    mask_array = mask
-                mask_array = (mask_array * 255).astype(np.uint8)
-            else:
-                # Create composite mask from segmentation results
-                mask_array = np.zeros((512, 512), dtype=np.uint8)
-                for segment in results:
-                    if 'segmentation' in segment:
-                        mask_array = np.logical_or(mask_array, segment['segmentation']).astype(np.uint8) * 255
-        else:
-            # Create blank mask as fallback
-            mask_array = np.zeros((512, 512), dtype=np.uint8)
-        # Convert to PIL Image
-        mask_img = Image.fromarray(mask_array)
-        return mask_img, input_pil
-    except Exception as e:
-        logger.error(f"Error in segmentation: {e}")
-        logger.error(traceback.format_exc())
-        # Return a blank mask in case of error
-        if isinstance(input_image, np.ndarray):
-            return Image.new('L', (512, 512), 0), Image.fromarray(input_image).resize((512, 512))
-        else:
-            return Image.new('L', (512, 512), 0), input_image.resize((512, 512))
-def apply_background_blur(original_image, mask_image, sigma=15):
-    """Apply Gaussian blur to the background using a segmentation mask"""
-    try:
-        # Convert to PIL Image if needed
-        if isinstance(original_image, np.ndarray):
-            original_image = Image.fromarray(original_image)
-        if isinstance(mask_image, np.ndarray):
-            mask_image = Image.fromarray(mask_image)
-        # Ensure consistent sizes
-        original_image = original_image.resize((512, 512)).convert('RGB')
-        mask_image = mask_image.resize((512, 512)).convert('L')
-        # Ensure mask is binary
-        mask_array = np.array(mask_image)
-        _, binary_mask = cv2.threshold(mask_array, 127, 255, cv2.THRESH_BINARY)
-        # Create blurred version of the original
-        blurred_img = original_image.filter(ImageFilter.GaussianBlur(radius=sigma))
-        # Convert to numpy for processing
-        original_array = np.array(original_image)
-        blurred_array = np.array(blurred_img)
-        # Create mask for all 3 channels
-        mask_3d = np.stack([binary_mask, binary_mask, binary_mask], axis=2)
-        # Combine original foreground with blurred background
-        result_array = np.where(mask_3d == 255, original_array, blurred_array)
-        return Image.fromarray(result_array.astype(np.uint8))
-    except Exception as e:
-        logger.error(f"Error in background blur: {e}")
-        logger.error(traceback.format_exc())
-        # Return original image in case of error
-        return original_image
-def get_depth_map(input_image):
-    """Get depth map using the depth estimation pipeline"""
-    try:
-        # Load model if not already loaded
-        model = load_depth_model()
-        if model is None:
-            logger.warning("Failed to load depth model, returning empty depth map")
-            return Image.new('L', (512, 512), 128), np.ones((512, 512)) * 0.5
-        # Convert to PIL if needed
-        if isinstance(input_image, np.ndarray):
-            input_image = Image.fromarray(input_image)
-        # Ensure consistent format
-        input_image = input_image.resize((512, 512)).convert('RGB')
-        # Get depth estimation
-        result = model(input_image)
-        depth_map = result["depth"]
-        # Convert to numpy for further processing
-        depth_array = np.array(depth_map)
-        return depth_map, depth_array
-    except Exception as e:
-        logger.error(f"Error in depth estimation: {e}")
-        logger.error(traceback.format_exc())
-        # Return default depth in case of error
-        return Image.new('L', (512, 512), 128), np.ones((512, 512)) * 0.5
-def apply_depth_based_blur(original_image, depth_array, max_blur=30):
-    """Apply variable Gaussian blur based on depth"""
-    try:
-        # Convert to PIL if needed
-        if isinstance(original_image, np.ndarray):
-            original_image = Image.fromarray(original_image)
-        # Ensure consistent size
-        original_image = original_image.resize((512, 512)).convert('RGB')
-        # Handle depth array format
-        if len(depth_array.shape) == 3 and depth_array.shape[2] > 1:
-            depth_array = np.mean(depth_array, axis=2)
-        # Normalize depth values
-        depth_min = np.min(depth_array)
-        depth_max = np.max(depth_array)
-        if depth_max > depth_min:
-            normalized_depth = (depth_array - depth_min) / (depth_max - depth_min)
-        else:
-            # Handle case where depth is constant
-            normalized_depth = np.zeros_like(depth_array)
-        # Create progressively blurred versions of the image
-        blurred_images = []
-        for blur_amount in range(max_blur + 1):
-            blurred_images.append(original_image.filter(ImageFilter.GaussianBlur(radius=blur_amount)))
-        # Create output array
-        result_array = np.zeros((512, 512, 3), dtype=np.uint8)
-        # Apply variable blur based on depth
-        height, width = normalized_depth.shape
-        for y in range(height):
-            for x in range(width):
-                blur_radius = int(normalized_depth[y, x] * max_blur)
-                result_array[y, x] = np.array(blurred_images[blur_radius])[y, x]
-        return Image.fromarray(result_array)
-    except Exception as e:
-        logger.error(f"Error in depth-based blur: {e}")
-        logger.error(traceback.format_exc())
-        # Return original image in case of error
-        return original_image
-def process_image(input_image, blur_sigma=15, max_depth_blur=30):
-    """Process the image through all steps with error handling"""
-    try:
-        if input_image is None:
-            logger.warning("No input image provided")
-            return None, None, None, None, None
-        # Step 1: Get segmentation mask
-        mask, resized_image = get_segmentation_mask(input_image)
-        # Step 2: Apply background blur
-        blurred_background = apply_background_blur(resized_image, mask, sigma=blur_sigma)
-        # Step 3: Get depth map
-        depth_map, depth_array = get_depth_map(resized_image)
-        # Step 4: Apply depth-based blur
-        depth_blur = apply_depth_based_blur(resized_image, depth_array, max_blur=max_depth_blur)
-        # Convert to numpy arrays for Gradio
-        input_np = np.array(resized_image)
-        mask_np = np.array(mask)
-        blurred_np = np.array(blurred_background)
-        depth_map_np = np.array(depth_map)
-        depth_blur_np = np.array(depth_blur)
-        return input_np, mask_np, blurred_np, depth_map_np, depth_blur_np
-    except Exception as e:
-        logger.error(f"Error in image processing: {e}")
-        logger.error(traceback.format_exc())
-        # Create blank outputs in case of error
-        empty = np.zeros((512, 512, 3), dtype=np.uint8)
-        empty_mask = np.zeros((512, 512), dtype=np.uint8)
-        if input_image is not None and isinstance(input_image, np.ndarray):
-            img_resized = cv2.resize(input_image, (512, 512))
-            return img_resized, empty_mask, empty, empty_mask, empty
-        else:
-            return empty, empty_mask, empty, empty_mask, empty
 # Create Gradio Interface
-demo = gr.Interface(
-    fn=process_image,
-    inputs=[
-        gr.Image(type="numpy", label="Upload Image"),
-        gr.Slider(minimum=1, maximum=30, value=15, step=1, label="Background Blur Strength (σ)"),
-        gr.Slider(minimum=1, maximum=50, value=30, step=1, label="Max Depth Blur Strength")
-    ],
-    outputs=[
-        gr.Image(type="numpy", label="Original Image"),
-        gr.Image(type="numpy", label="Segmentation Mask"),
-        gr.Image(type="numpy", label="Background Blur"),
-        gr.Image(type="numpy", label="Depth Map"),
-        gr.Image(type="numpy", label="Depth-Based Lens Blur")
-    ],
-    title="Image Blur Effects - EEE 515 Assignment 3",
-    description="Upload an image to apply segmentation-based blur and depth-based lens blur effects",
-    examples=[["beach.jpeg", 15, 30]],
-    allow_flagging="never"
-)
 # Launch the app
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 import cv2
+import numpy as np
+from PIL import Image
+import requests
+from io import BytesIO
+from transformers import AutoFeatureExtractor, AutoModelForSemanticSegmentation
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
+import torch.nn.functional as F
+# Define device
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load segmentation model
+segmentation_model_name = "facebook/mask2former-swin-tiny-coco-instance"
+seg_feature_extractor = AutoFeatureExtractor.from_pretrained(segmentation_model_name)
+seg_model = AutoModelForSemanticSegmentation.from_pretrained(segmentation_model_name).to(device)
+# Load depth estimation model
+depth_model_name = "intel-isl/MiDaS-small"
+depth_processor = AutoImageProcessor.from_pretrained(depth_model_name)
+depth_model = AutoModelForDepthEstimation.from_pretrained(depth_model_name).to(device)
+def apply_segmentation(input_image):
+    # Convert to PIL Image if needed
+    if not isinstance(input_image, Image.Image):
+        input_image = Image.fromarray(input_image)
+    # Resize to 512x512 for consistent processing
+    input_image = input_image.resize((512, 512))
+    # Prepare image for the model
+    inputs = seg_feature_extractor(images=input_image, return_tensors="pt").to(device)
+    # Forward pass
+    with torch.no_grad():
+        outputs = seg_model(**inputs)
+    # Process output to get binary mask (foreground=1, background=0)
+    logits = outputs.logits
+    predicted_mask = torch.argmax(logits, dim=1)
+    # Convert to numpy for processing
+    mask = predicted_mask[0].cpu().numpy()
+    # Focus on person class (typically class 0 or 1 depending on the model)
+    mask = (mask > 0).astype(np.uint8) * 255
+    return np.array(input_image), mask
+def apply_depth_estimation(input_image):
+    # Convert to PIL Image if needed
+    if not isinstance(input_image, Image.Image):
+        input_image = Image.fromarray(input_image)
+    # Resize to 512x512 for consistent processing
+    input_image = input_image.resize((512, 512))
+    # Prepare image for the model
+    inputs = depth_processor(images=input_image, return_tensors="pt").to(device)
+    # Forward pass
+    with torch.no_grad():
+        outputs = depth_model(**inputs)
+    # Process depth map
+    depth_map = outputs.predicted_depth
+    depth_map = torch.nn.functional.interpolate(
+        depth_map.unsqueeze(1),
+        size=(512, 512),
+        mode="bicubic",
+        align_corners=False,
+    ).squeeze()
+    # Normalize depth map to 0-1 range
+    depth_min = torch.min(depth_map)
+    depth_max = torch.max(depth_map)
+    depth_map = (depth_map - depth_min) / (depth_max - depth_min)
+    # Convert to numpy
+    depth_map = depth_map.cpu().numpy()
+    # Convert depth to heatmap for visualization
+    depth_map_vis = (depth_map * 255).astype(np.uint8)
+    depth_map_vis = cv2.applyColorMap(depth_map_vis, cv2.COLORMAP_INFERNO)
+    return np.array(input_image), depth_map, depth_map_vis
+def apply_gaussian_blur(image, mask, sigma=15):
+    # Make a copy of the image
+    result = image.copy()
+    # Ensure mask is binary (0 or 1)
+    if mask.max() > 1:
+        mask = mask / 255.0
+    # Expand mask to 3 channels if needed
+    if len(mask.shape) == 2:
+        mask = np.expand_dims(mask, axis=2)
+        mask = np.repeat(mask, 3, axis=2)
+    # Blur the entire image
+    blurred = cv2.GaussianBlur(image, (0, 0), sigma)
+    # Combine original image (foreground) with blurred image (background) using the mask
+    result = image * mask + blurred * (1 - mask)
+    return result.astype(np.uint8)
+def apply_depth_blur(image, depth_map, max_sigma=30):
+    # Make a copy of the image
+    result = np.zeros_like(image)
+    # Ensure depth map values are between 0-1
+    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
+    # Apply variable blur based on depth
+    for sigma in range(1, max_sigma + 1):
+        # Create a mask for this depth level
+        depth_mask = ((depth_map >= (sigma - 1) / max_sigma) &
+                      (depth_map < sigma / max_sigma)).astype(np.float32)
+        # Expand mask to 3 channels if needed
+        if len(depth_mask.shape) == 2:
+            depth_mask = np.expand_dims(depth_mask, axis=2)
+            depth_mask = np.repeat(depth_mask, 3, axis=2)
+        # Apply blur with current sigma
+        current_blur = cv2.GaussianBlur(image, (0, 0), sigma)
+        # Add to result
+        result += (current_blur * depth_mask).astype(np.uint8)
+    # Handle remaining pixels (if any)
+    remaining_mask = (depth_map >= 1.0).astype(np.float32)
+    if len(remaining_mask.shape) == 2:
+        remaining_mask = np.expand_dims(remaining_mask, axis=2)
+        remaining_mask = np.repeat(remaining_mask, 3, axis=2)
+    max_blur = cv2.GaussianBlur(image, (0, 0), max_sigma)
+    result += (max_blur * remaining_mask).astype(np.uint8)
+    return result
+def process_image(input_image, blur_type, blur_strength):
+    # Convert to numpy array if needed
+    if isinstance(input_image, str):
+        # Load from URL if it's a string
+        response = requests.get(input_image)
+        input_image = Image.open(BytesIO(response.content))
+    # Resize to 512x512 for consistent processing
+    input_image = Image.fromarray(input_image).resize((512, 512))
+    input_image_np = np.array(input_image)
+    # Process based on selected blur type
+    if blur_type == "Gaussian Background Blur":
+        # Apply segmentation
+        _, mask = apply_segmentation(input_image)
+        # Apply Gaussian blur with chosen strength
+        result = apply_gaussian_blur(input_image_np, mask, sigma=blur_strength)
+        return input_image_np, result
+    elif blur_type == "Depth-based Lens Blur":
+        # Apply depth estimation
+        _, depth_map, depth_vis = apply_depth_estimation(input_image)
+        # Apply depth-based blur with chosen max strength
+        result = apply_depth_blur(input_image_np, depth_map, max_sigma=blur_strength)
+        return input_image_np, result
 # Create Gradio Interface
+with gr.Blocks(title="Image Blur Effects Demo") as app:
+    gr.Markdown("# Image Blur Effects Demo")
+    gr.Markdown("Upload an image to apply different blur effects using deep learning models")
+    with gr.Row():
+        input_image = gr.Image(label="Input Image", type="numpy")
+        output_image = gr.Image(label="Output Image", type="numpy")
+    with gr.Row():
+        blur_type = gr.Radio(
+            choices=["Gaussian Background Blur", "Depth-based Lens Blur"],
+            label="Blur Effect Type",
+            value="Gaussian Background Blur"
+        )
+        blur_strength = gr.Slider(
+            minimum=1, maximum=50, value=15, step=1,
+            label="Blur Strength"
+        )
+    submit_button = gr.Button("Apply Effect")
+    submit_button.click(
+        fn=process_image,
+        inputs=[input_image, blur_type, blur_strength],
+        outputs=[input_image, output_image]
+    )
+    gr.Markdown("""
+    ## How it works
+    1. **Gaussian Background Blur**: Uses a segmentation model to detect the foreground object and applies blur to the background
+    2. **Depth-based Lens Blur**: Uses a depth estimation model to create a variable blur effect where objects further away are more blurred
+    Both models are from Hugging Face Transformers library.
+    """)
 # Launch the app
+app.launch()