Spaces:

teward-52
/

EEE515-HW3-2-6

Running

File size: 6,135 Bytes

import gradio as gr
import torch
from torch import nn
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation
import numpy as np
from PIL import Image, ImageFilter
import os, hashlib
from huggingface_hub import snapshot_download

'''for Lens Blur'''
# Global: load model & processor once
MODEL_REPO = "apple/DepthPro-hf"
CACHE_DIR = "./cache"  # cache folder for model files
EXPECTED_SHA256 = "9c6811e3165485b9a94a204329860cb333a79877e757eb795a179a4ea34bbcf7"  # expected hash&#8203;:contentReference[oaicite:7]{index=7}

# Download model repository (if not cached) and verify SHA-256
snapshot_path = snapshot_download(repo_id=MODEL_REPO, cache_dir=CACHE_DIR)
model_file = os.path.join(snapshot_path, "model.safetensors")
# Compute SHA-256 of the model file
with open(model_file, "rb") as f:
    file_hash = hashlib.sha256(f.read()).hexdigest()
if file_hash != EXPECTED_SHA256:
    raise RuntimeError("Model file hash mismatch! Download may be corrupted.")
# Load model and processor (from local files, avoiding re-download)
model = DepthProForDepthEstimation.from_pretrained(snapshot_path)
processor = DepthProImageProcessorFast.from_pretrained(snapshot_path)
# Use GPU if available for speed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Define the simple greeting function
def greet_test(name):
    return "Hello " + name + "!!"

# Define the Gaussian blur + segmentation function
def gauss_blur(image, sigma):
    device = (
        "cuda"
        if torch.cuda.is_available()
        else "mps"
        if torch.backends.mps.is_available()
        else "cpu"
    )
    
    # Ensure image is a PIL Image
    if not isinstance(image, Image.Image):
        image = Image.fromarray(image)
    
    # Load models
    image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
    model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
    model.to(device)
    
    # Run inference on image
    inputs = image_processor(images=image, return_tensors="pt").to(device)
    outputs = model(**inputs)
    logits = outputs.logits

    # Resize output to match input image dimensions
    upsampled_logits = nn.functional.interpolate(
        logits,
        size=image.size[::-1],  # H x W
        mode='bilinear',
        align_corners=False
    )

    # Get label masks
    labels = upsampled_logits.argmax(dim=1)[0]
    labels_viz = labels.cpu().numpy()

    # Create foreground mask
    foreground_mask = (labels_viz != 0).astype(np.uint8)

    # Apply Gaussian blur
    image_np = np.array(image)
    blurred_image = Image.fromarray(image_np).filter(ImageFilter.GaussianBlur(radius=sigma))
    blurred_image_np = np.array(blurred_image)

    # Combine blurred background with original foreground
    result_image_np = (
        image_np * foreground_mask[:, :, None] + 
        blurred_image_np * (1 - foreground_mask[:, :, None])
    )

    return Image.fromarray(result_image_np.astype(np.uint8))

def lens_blur(image: Image.Image) -> Image.Image:
    """Apply depth-dependent lens blur to the input PIL image using DepthPro model."""
    # 1. Preprocess input: resize (preserve aspect ratio up to 1536px) and prepare tensor
    orig_w, orig_h = image.size
    max_dim = max(orig_w, orig_h)
    if max_dim > 1536:  # limit size for model
        ratio = 1536.0 / max_dim
        new_size = (int(orig_w * ratio), int(orig_h * ratio))
        image_resized = image.resize(new_size, Image.LANCZOS)
    else:
        image_resized = image
    # Prepare model input
    inputs = processor(images=image_resized, return_tensors="pt")  # includes resizing to 1536x1536 internally
    inputs = {k: v.to(device) for k, v in inputs.items()}
    # 2. Inference: predict depth map
    with torch.no_grad():
        outputs = model(**inputs)
    # Post-process to get depth map at original image resolution
    depth_map = processor.post_process_depth_estimation(
        outputs, target_sizes=[(orig_h, orig_w)]
    )[0]["predicted_depth"]
    depth_map = depth_map.squeeze().cpu().float().numpy()  # H x W depth values
    # Normalize depth to [0,1]
    depth_min, depth_max = depth_map.min(), depth_map.max()
    if depth_max > depth_min:
        depth_norm = (depth_map - depth_min) / (depth_max - depth_min)
    else:
        depth_norm = depth_map * 0.0  # all pixels same depth
    # 3. Create blurred version of the original image
    blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
    blurred_np = np.array(blurred_image, dtype=np.float32)
    original_np = np.array(image, dtype=np.float32)
    # Ensure depth mask has shape (H, W, 1) for broadcasting across color channels
    depth_mask = depth_norm.astype(np.float32)[..., None]
    # 4. Blend images: near (mask~0) -> original, far (mask~1) -> blurred
    blended_np = original_np * (1 - depth_mask) + blurred_np * depth_mask
    blended_np = blended_np.clip(0, 255).astype(np.uint8)
    result_image = Image.fromarray(blended_np)
    
    return result_image
    
# Build the Gradio app with Tabs
with gr.Blocks() as demo:
    gr.Markdown("# Gaussian Blur and Lens Blur Demo")

    with gr.Tab("Greeting (Basic Test)"):
        gr.Interface(fn=greet_test, inputs="text", outputs="text")

    with gr.Tab("Gaussian Blur on Foreground"):
        gr.Interface(fn=gauss_blur, inputs=["image", "number"], outputs="image",
                     title="Gaussian Blur",
                     description="Apply Gaussian blur to the background of the image while keeping the foreground sharp. Adjust the sigma value to control the blur intensity.",
                     )
        
    with gr.Tab("Lens Blur"):
        gr.Interface(fn=lens_blur, inputs=gr.Image(type="pil"), outputs="image",
                     title="Lens Blur",
                     description="Apply depth-dependent lens blur to the image using the Apple DepthPro model. The blur intensity varies based on the depth of each pixel.",
                     )

demo.launch(share=True)  # Uncomment to enable sharing