File size: 6,602 Bytes
da08054
c685a5d
da08054
 
 
f59b093
c685a5d
f59b093
c685a5d
 
 
f59b093
c685a5d
 
f59b093
 
 
c685a5d
 
 
 
 
 
 
 
 
 
f59b093
 
4c98fbd
 
 
f59b093
4c98fbd
 
 
 
f59b093
4c98fbd
f59b093
 
c685a5d
 
 
 
da08054
c685a5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da08054
f59b093
 
f005457
81824d7
 
f59b093
f005457
 
6ebec3f
f005457
 
 
6ebec3f
f005457
 
6ebec3f
f005457
 
f59b093
f005457
 
 
 
 
 
6ebec3f
f005457
 
6ebec3f
f005457
 
 
da08054
f005457
 
 
 
 
 
da08054
f005457
 
 
f59b093
da08054
81824d7
f59b093
 
da08054
1150a04
f59b093
1150a04
 
da08054
f59b093
 
 
da08054
 
1150a04
da08054
f59b093
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import gradio as gr
from transformers import OneFormerProcessor, OneFormerForUniversalSegmentation, AutoImageProcessor, AutoModelForDepthEstimation
from PIL import Image, ImageFilter
import numpy as np
import torch
from scipy.ndimage import gaussian_filter
import cv2

# Load the OneFormer processor and model globally
oneformer_processor = None
oneformer_model = None
try:
    oneformer_processor = OneFormerProcessor.from_pretrained("shi-labs/oneformer_coco_swin_large")
    oneformer_model = OneFormerForUniversalSegmentation.from_pretrained("shi-labs/oneformer_coco_swin_large")
except Exception as e:
    print(f"Error loading OneFormer model: {e}")

# Load the Depth Estimation processor and model globally
depth_processor = None
depth_model = None
try:
    depth_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
    depth_model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
except Exception as e:
    print(f"Error loading Depth Anything model: {e}")

def apply_gaussian_blur_background(image, mask, radius):
    """Applies Gaussian blur to the background of the image."""
    img_array = np.array(image)
    
    # Apply Gaussian blur to the entire image
    blurred_background = image.filter(ImageFilter.GaussianBlur(radius=radius))
    blurred_array = np.array(blurred_background)

    # Ensure the mask is in the same size and format as the image
    foreground_mask = np.array(mask) > 0
    foreground_mask_3d = np.stack([foreground_mask] * 3, axis=-1)  # Ensure it's 3 channels (RGB)
    final_image_array = np.where(foreground_mask_3d, img_array, blurred_array)

    return Image.fromarray(final_image_array.astype(np.uint8))

def apply_depth_based_blur_background(image, mask, strength):
    """Applies lens blur to the background of the image based on depth estimation."""
    resized_image = image.resize((512, 512))
    image_np = np.array(resized_image)

    if depth_processor is None or depth_model is None:
        return "Error: Depth Anything model not loaded."

    # Prepare image for the depth estimation model
    inputs = depth_processor(images=resized_image, return_tensors="pt")

    with torch.no_grad():
        outputs = depth_model(**inputs)
        predicted_depth = outputs.predicted_depth

    # Interpolate depth map to the resized image size
    prediction = torch.nn.functional.interpolate(
        predicted_depth.unsqueeze(1),
        size=resized_image.size[::-1],
        mode="bicubic",
        align_corners=False,
    ).squeeze().cpu().numpy()

    # Normalize the depth map to the range 0-1
    depth_norm = (prediction - np.min(prediction)) / (np.max(prediction) - np.min(prediction))

    num_blur_levels = 5
    blurred_layers = []
    for i in range(num_blur_levels):
        sigma = i * (strength / 5)  # Adjust sigma based on strength
        if sigma == 0:
            blurred = image_np
        else:
            blurred = cv2.GaussianBlur(image_np, (15, 15), sigmaX=sigma, sigmaY=sigma, borderType=cv2.BORDER_REPLICATE)
        blurred_layers.append(blurred)

    depth_indices = ((1 - depth_norm) * (num_blur_levels - 1)).astype(np.uint8)

    final_blurred_image_resized = np.zeros_like(image_np)
    for y in range(image_np.shape[0]):
        for x in range(image_np.shape[1]):
            depth_index = depth_indices[y, x]
            final_blurred_image_resized[y, x] = blurred_layers[depth_index][y, x]

    final_blurred_pil_resized = Image.fromarray(final_blurred_image_resized.astype(np.uint8))
    final_blurred_pil = final_blurred_pil_resized.resize(image.size)
    final_blurred_array = np.array(final_blurred_pil)
    original_array = np.array(image)
    mask_resized = mask.resize(image.size)
    mask_array = np.array(mask_resized) > 0
    mask_array_3d = np.stack([mask_array] * 3, axis=-1)

    # Apply the mask to combine the original foreground with the blurred background
    final_output_array = np.where(mask_array_3d, original_array, final_blurred_array)
    return Image.fromarray(final_output_array.astype(np.uint8))


def segment_and_blur(input_image, blur_type, gaussian_radius=15, lens_strength=5):
    try:
        if oneformer_processor is None or oneformer_model is None:
            return "Error: OneFormer model not loaded."  # <-- Fix indentation here

        image = input_image.convert("RGB")
        image = image.rotate(-90, expand=True)

        inputs = oneformer_processor(images=image, task_inputs=["semantic"], return_tensors="pt")
        with torch.no_grad():
            outputs = oneformer_model(**inputs)

        predicted_semantic_map = oneformer_processor.post_process_semantic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
        segmentation_mask = predicted_semantic_map.cpu().numpy()

        id2label = oneformer_model.config.id2label
        print(id2label)  # <-- Add this to debug

        foreground_label = 'person'
        foreground_class_id = None
        for id, label in id2label.items():
            if label.lower() == foreground_label.lower():
                foreground_class_id = id
                break

        if foreground_class_id is None:
            return f"Error: Could not find the label '{foreground_label}' in the model's class mapping."

        output_mask_array = np.zeros(segmentation_mask.shape, dtype=np.uint8)
        output_mask_array[segmentation_mask == foreground_class_id] = 255
        mask_pil = Image.fromarray(output_mask_array, mode='L')

        if blur_type == "Gaussian":
            blurred_image = apply_gaussian_blur_background(image, mask_pil, gaussian_radius)
        elif blur_type == "Lens":
            blurred_image = apply_depth_based_blur_background(image, mask_pil, lens_strength)
        else:
            return "Error: Invalid blur type selected."

        return blurred_image
    except Exception as e:
        return f"Error during processing: {str(e)}"



iface = gr.Interface(
    fn=segment_and_blur,
    inputs=[
        gr.Image(type="pil", label="Input Image"),  # <-- ADD type="pil" here
        gr.Radio(["Gaussian", "Lens"], label="Blur Type", value="Gaussian"),
        gr.Slider(0, 30, step=1, value=15, label="Gaussian Blur Radius"),
        gr.Slider(0, 10, step=1, value=5, label="Lens Blur Strength"),
    ],
    outputs=gr.Image(label="Output Image"),
    title="Image Background Blur App",
    description="Upload an image, select a blur type (Gaussian or Lens), and adjust the blur parameters to blur the background while keeping the person in focus."
)


if __name__ == "__main__":
    iface.launch()