EEE515-HW3-2-6 / app.py
Trenton Ward
Fix gradio image call
4bdbf01
import gradio as gr
import torch
from torch import nn
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation, DepthProImageProcessorFast, DepthProForDepthEstimation
import numpy as np
from PIL import Image, ImageFilter
import os, hashlib
from huggingface_hub import snapshot_download
'''for Lens Blur'''
# Global: load model & processor once
MODEL_REPO = "apple/DepthPro-hf"
CACHE_DIR = "./cache" # cache folder for model files
EXPECTED_SHA256 = "9c6811e3165485b9a94a204329860cb333a79877e757eb795a179a4ea34bbcf7" # expected hash​:contentReference[oaicite:7]{index=7}
# Download model repository (if not cached) and verify SHA-256
snapshot_path = snapshot_download(repo_id=MODEL_REPO, cache_dir=CACHE_DIR)
model_file = os.path.join(snapshot_path, "model.safetensors")
# Compute SHA-256 of the model file
with open(model_file, "rb") as f:
file_hash = hashlib.sha256(f.read()).hexdigest()
if file_hash != EXPECTED_SHA256:
raise RuntimeError("Model file hash mismatch! Download may be corrupted.")
# Load model and processor (from local files, avoiding re-download)
model = DepthProForDepthEstimation.from_pretrained(snapshot_path)
processor = DepthProImageProcessorFast.from_pretrained(snapshot_path)
# Use GPU if available for speed
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()
# Define the simple greeting function
def greet_test(name):
return "Hello " + name + "!!"
# Define the Gaussian blur + segmentation function
def gauss_blur(image, sigma):
device = (
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
# Ensure image is a PIL Image
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Load models
image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
model.to(device)
# Run inference on image
inputs = image_processor(images=image, return_tensors="pt").to(device)
outputs = model(**inputs)
logits = outputs.logits
# Resize output to match input image dimensions
upsampled_logits = nn.functional.interpolate(
logits,
size=image.size[::-1], # H x W
mode='bilinear',
align_corners=False
)
# Get label masks
labels = upsampled_logits.argmax(dim=1)[0]
labels_viz = labels.cpu().numpy()
# Create foreground mask
foreground_mask = (labels_viz != 0).astype(np.uint8)
# Apply Gaussian blur
image_np = np.array(image)
blurred_image = Image.fromarray(image_np).filter(ImageFilter.GaussianBlur(radius=sigma))
blurred_image_np = np.array(blurred_image)
# Combine blurred background with original foreground
result_image_np = (
image_np * foreground_mask[:, :, None] +
blurred_image_np * (1 - foreground_mask[:, :, None])
)
return Image.fromarray(result_image_np.astype(np.uint8))
def lens_blur(image: Image.Image) -> Image.Image:
"""Apply depth-dependent lens blur to the input PIL image using DepthPro model."""
# 1. Preprocess input: resize (preserve aspect ratio up to 1536px) and prepare tensor
orig_w, orig_h = image.size
max_dim = max(orig_w, orig_h)
if max_dim > 1536: # limit size for model
ratio = 1536.0 / max_dim
new_size = (int(orig_w * ratio), int(orig_h * ratio))
image_resized = image.resize(new_size, Image.LANCZOS)
else:
image_resized = image
# Prepare model input
inputs = processor(images=image_resized, return_tensors="pt") # includes resizing to 1536x1536 internally
inputs = {k: v.to(device) for k, v in inputs.items()}
# 2. Inference: predict depth map
with torch.no_grad():
outputs = model(**inputs)
# Post-process to get depth map at original image resolution
depth_map = processor.post_process_depth_estimation(
outputs, target_sizes=[(orig_h, orig_w)]
)[0]["predicted_depth"]
depth_map = depth_map.squeeze().cpu().float().numpy() # H x W depth values
# Normalize depth to [0,1]
depth_min, depth_max = depth_map.min(), depth_map.max()
if depth_max > depth_min:
depth_norm = (depth_map - depth_min) / (depth_max - depth_min)
else:
depth_norm = depth_map * 0.0 # all pixels same depth
# 3. Create blurred version of the original image
blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15))
blurred_np = np.array(blurred_image, dtype=np.float32)
original_np = np.array(image, dtype=np.float32)
# Ensure depth mask has shape (H, W, 1) for broadcasting across color channels
depth_mask = depth_norm.astype(np.float32)[..., None]
# 4. Blend images: near (mask~0) -> original, far (mask~1) -> blurred
blended_np = original_np * (1 - depth_mask) + blurred_np * depth_mask
blended_np = blended_np.clip(0, 255).astype(np.uint8)
result_image = Image.fromarray(blended_np)
return result_image
# Build the Gradio app with Tabs
with gr.Blocks() as demo:
gr.Markdown("# Gaussian Blur and Lens Blur Demo")
with gr.Tab("Greeting (Basic Test)"):
gr.Interface(fn=greet_test, inputs="text", outputs="text")
with gr.Tab("Gaussian Blur on Foreground"):
gr.Interface(fn=gauss_blur, inputs=["image", "number"], outputs="image",
title="Gaussian Blur",
description="Apply Gaussian blur to the background of the image while keeping the foreground sharp. Adjust the sigma value to control the blur intensity.",
)
with gr.Tab("Lens Blur"):
gr.Interface(fn=lens_blur, inputs=gr.Image(type="pil"), outputs="image",
title="Lens Blur",
description="Apply depth-dependent lens blur to the image using the Apple DepthPro model. The blur intensity varies based on the depth of each pixel.",
)
demo.launch(share=True) # Uncomment to enable sharing