import gradio as gr from PIL import Image, ImageFilter import numpy as np import torch from transformers import ( SegformerFeatureExtractor, SegformerForSemanticSegmentation, DPTFeatureExtractor, DPTForDepthEstimation ) import cv2 import os, json # ———————————————— # load segmentation model seg_model_name = "nvidia/segformer-b1-finetuned-ade-512-512" seg_fe = SegformerFeatureExtractor.from_pretrained(seg_model_name) seg_model = SegformerForSemanticSegmentation.from_pretrained(seg_model_name) # load depth model depth_model_name = "Intel/dpt-hybrid-midas" depth_fe = DPTFeatureExtractor.from_pretrained(depth_model_name) depth_model = DPTForDepthEstimation.from_pretrained(depth_model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") seg_model.to(device) depth_model.to(device) def process_image(image: Image.Image): # 1) prep image = image.convert("RGB").resize((512,512)) # 2) segmentation → binary mask seg_inputs = seg_fe(images=image, return_tensors="pt").to(device) with torch.no_grad(): seg_logits = seg_model(**seg_inputs).logits seg_map = torch.argmax(seg_logits, dim=1)[0].cpu().numpy() mask = (seg_map > 0).astype(np.uint8) * 255 mask = Image.fromarray(mask).resize((512,512)) # 3) gaussian-blur background bg_blur = image.filter(ImageFilter.GaussianBlur(15)) output_blur = Image.composite(image, bg_blur, mask) # 4) depth estimation depth_inputs = depth_fe(images=image, return_tensors="pt").to(device) with torch.no_grad(): depth_pred = depth_model(**depth_inputs).predicted_depth.squeeze().cpu().numpy() # normalize & resize dmin, dmax = depth_pred.min(), depth_pred.max() depth_norm = (depth_pred - dmin) / (dmax - dmin + 1e-8) depth_norm = cv2.resize(depth_norm, (512,512)) # 5) vectorized depth-based blur img_np = np.array(image).astype(np.float32) # apply an Unsharp Mask to sharpen the whole image sharp = image.filter(ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3)) sharp_np = np.array(sharp).astype(np.float32) near_blur = img_np far_blur = cv2.GaussianBlur(img_np, (81,81), 20) # high=foreground, low=background alpha = depth_norm[...,None] combined = sharp_np * alpha + far_blur * (1.0 - alpha) lens_blur = Image.fromarray(np.clip(combined,0,255).astype(np.uint8)) return image, output_blur, lens_blur iface = gr.Interface( fn=process_image, inputs=gr.Image(type="pil", label="Upload Image"), outputs=[ gr.Image(type="pil", label="Original"), gr.Image(type="pil", label="Gaussian Blur"), gr.Image(type="pil", label="Depth-Based Lens Blur"), ], title="Image Blurring with CLAHE + Depth-Based Blur", description="Upload a selfie to see background blur and depth-based lens blur." ) if __name__ == "__main__": iface.launch(share=True)