Spaces:
Running
Running
import gradio as gr | |
import torch | |
from torch import nn | |
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation | |
import numpy as np | |
from PIL import Image, ImageFilter | |
import os, hashlib | |
from huggingface_hub import snapshot_download | |
'''for Lens Blur''' | |
# Global: load model & processor once | |
MODEL_REPO = "apple/DepthPro-hf" | |
CACHE_DIR = "./cache" # cache folder for model files | |
EXPECTED_SHA256 = "9c6811e3165485b9a94a204329860cb333a79877e757eb795a179a4ea34bbcf7" # expected hash​:contentReference[oaicite:7]{index=7} | |
# Download model repository (if not cached) and verify SHA-256 | |
snapshot_path = snapshot_download(repo_id=MODEL_REPO, cache_dir=CACHE_DIR) | |
model_file = os.path.join(snapshot_path, "model.safetensors") | |
# Compute SHA-256 of the model file | |
with open(model_file, "rb") as f: | |
file_hash = hashlib.sha256(f.read()).hexdigest() | |
if file_hash != EXPECTED_SHA256: | |
raise RuntimeError("Model file hash mismatch! Download may be corrupted.") | |
# Load model and processor (from local files, avoiding re-download) | |
model = DepthProForDepthEstimation.from_pretrained(snapshot_path) | |
processor = DepthProImageProcessorFast.from_pretrained(snapshot_path) | |
# Use GPU if available for speed | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device).eval() | |
# Define the simple greeting function | |
def greet_test(name): | |
return "Hello " + name + "!!" | |
# Define the Gaussian blur + segmentation function | |
def gauss_blur(image, sigma): | |
device = ( | |
"cuda" | |
if torch.cuda.is_available() | |
else "mps" | |
if torch.backends.mps.is_available() | |
else "cpu" | |
) | |
# Ensure image is a PIL Image | |
if not isinstance(image, Image.Image): | |
image = Image.fromarray(image) | |
# Load models | |
image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing") | |
model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing") | |
model.to(device) | |
# Run inference on image | |
inputs = image_processor(images=image, return_tensors="pt").to(device) | |
outputs = model(**inputs) | |
logits = outputs.logits | |
# Resize output to match input image dimensions | |
upsampled_logits = nn.functional.interpolate( | |
logits, | |
size=image.size[::-1], # H x W | |
mode='bilinear', | |
align_corners=False | |
) | |
# Get label masks | |
labels = upsampled_logits.argmax(dim=1)[0] | |
labels_viz = labels.cpu().numpy() | |
# Create foreground mask | |
foreground_mask = (labels_viz != 0).astype(np.uint8) | |
# Apply Gaussian blur | |
image_np = np.array(image) | |
blurred_image = Image.fromarray(image_np).filter(ImageFilter.GaussianBlur(radius=sigma)) | |
blurred_image_np = np.array(blurred_image) | |
# Combine blurred background with original foreground | |
result_image_np = ( | |
image_np * foreground_mask[:, :, None] + | |
blurred_image_np * (1 - foreground_mask[:, :, None]) | |
) | |
return Image.fromarray(result_image_np.astype(np.uint8)) | |
def lens_blur(image: Image.Image) -> Image.Image: | |
"""Apply depth-dependent lens blur to the input PIL image using DepthPro model.""" | |
# 1. Preprocess input: resize (preserve aspect ratio up to 1536px) and prepare tensor | |
orig_w, orig_h = image.size | |
max_dim = max(orig_w, orig_h) | |
if max_dim > 1536: # limit size for model | |
ratio = 1536.0 / max_dim | |
new_size = (int(orig_w * ratio), int(orig_h * ratio)) | |
image_resized = image.resize(new_size, Image.LANCZOS) | |
else: | |
image_resized = image | |
# Prepare model input | |
inputs = processor(images=image_resized, return_tensors="pt") # includes resizing to 1536x1536 internally | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
# 2. Inference: predict depth map | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Post-process to get depth map at original image resolution | |
depth_map = processor.post_process_depth_estimation( | |
outputs, target_sizes=[(orig_h, orig_w)] | |
)[0]["predicted_depth"] | |
depth_map = depth_map.squeeze().cpu().float().numpy() # H x W depth values | |
# Normalize depth to [0,1] | |
depth_min, depth_max = depth_map.min(), depth_map.max() | |
if depth_max > depth_min: | |
depth_norm = (depth_map - depth_min) / (depth_max - depth_min) | |
else: | |
depth_norm = depth_map * 0.0 # all pixels same depth | |
# 3. Create blurred version of the original image | |
blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15)) | |
blurred_np = np.array(blurred_image, dtype=np.float32) | |
original_np = np.array(image, dtype=np.float32) | |
# Ensure depth mask has shape (H, W, 1) for broadcasting across color channels | |
depth_mask = depth_norm.astype(np.float32)[..., None] | |
# 4. Blend images: near (mask~0) -> original, far (mask~1) -> blurred | |
blended_np = original_np * (1 - depth_mask) + blurred_np * depth_mask | |
blended_np = blended_np.clip(0, 255).astype(np.uint8) | |
result_image = Image.fromarray(blended_np) | |
return result_image | |
# Build the Gradio app with Tabs | |
with gr.Blocks() as demo: | |
gr.Markdown("# Gaussian Blur and Lens Blur Demo") | |
with gr.Tab("Greeting (Basic Test)"): | |
gr.Interface(fn=greet_test, inputs="text", outputs="text") | |
with gr.Tab("Gaussian Blur on Foreground"): | |
gr.Interface(fn=gauss_blur, inputs=["image", "number"], outputs="image", | |
title="Gaussian Blur", | |
description="Apply Gaussian blur to the background of the image while keeping the foreground sharp. Adjust the sigma value to control the blur intensity.", | |
) | |
with gr.Tab("Lens Blur"): | |
gr.Interface(fn=lens_blur, inputs=gr.Image(type="pil"), outputs="image", | |
title="Lens Blur", | |
description="Apply depth-dependent lens blur to the image using the Apple DepthPro model. The blur intensity varies based on the depth of each pixel.", | |
) | |
demo.launch(share=True) # Uncomment to enable sharing |