Spaces:

yuyutsu07
/

Pseudo3D

Running on Zero

App Files Files Community

yuyutsu07 commited on Mar 12

Commit

c1d8b1e

verified ·

1 Parent(s): 34ee49d

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -42

app.py CHANGED Viewed

@@ -7,36 +7,78 @@ from torchvision.transforms import ToTensor, Resize
 import spaces
 import tempfile
 from scipy.ndimage import gaussian_filter
-from aura_sr import AuraSR  # Import AuraSR for upscaling
 # Load AuraSR-v2 model once at startup
 aura_sr = AuraSR.from_pretrained("fal/AuraSR-v2")
 @spaces.GPU
-def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps, duration, ssaa_factor, use_taa, use_upscale):
-    """
-    Generate a 3D parallax video with enhanced quality features and optional upscaling.
-    Args:
-        image (PIL.Image): Input RGB image.
-        depth_map (PIL.Image): Grayscale depth map.
-        animation_style (str): Animation type.
-        amplitude (float): Camera movement intensity.
-        k (float): Depth displacement scale.
-        fps (int): Frames per second.
-        duration (float): Video duration in seconds.
-        ssaa_factor (int): Super sampling factor (1, 2, 4).
-        use_taa (bool): Enable temporal anti-aliasing.
-        use_upscale (bool): Enable AuraSR-v2 upscaling for each frame.
-    Returns:
-        str: Path to the generated video file.
-    """
     # Validate input dimensions
     if image.size != depth_map.size:
         raise ValueError("Image and depth map must have the same dimensions")
-    # Convert to tensors with high precision
     image_tensor = ToTensor()(image).to('cuda', dtype=torch.float32)
     depth_tensor = ToTensor()(depth_map.convert('L')).to('cuda', dtype=torch.float32)
     depth_tensor = (depth_tensor - depth_tensor.min()) / (depth_tensor.max() - depth_tensor.min() + 1e-6)
@@ -53,13 +95,14 @@ def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps
         depth_tensor = upscale(depth_tensor)
     H, W = image_tensor.shape[1], image_tensor.shape[2]
-    # Create coordinate grid
     x = torch.arange(0, W).float().to('cuda')
     y = torch.arange(0, H).float().to('cuda')
     xx, yy = torch.meshgrid(x, y, indexing='xy')
     pixel_grid = torch.stack((xx, yy), dim=-1)
     # Generate frames
     num_frames = int(fps * duration)
     frames = []
@@ -67,27 +110,36 @@ def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps
     for frame in range(num_frames):
         t = frame / num_frames
-        if animation_style == "horizontal":
             camera_x = amplitude * np.sin(2 * np.pi * t)
-            camera_y = 0
         elif animation_style == "vertical":
-            camera_x = 0
             camera_y = amplitude * np.sin(2 * np.pi * t)
         elif animation_style == "circle":
             camera_x = amplitude * np.sin(2 * np.pi * t)
             camera_y = amplitude * np.cos(2 * np.pi * t)
         elif animation_style == "spiral":
             radius = amplitude * (1 - t)
             camera_x = radius * np.sin(4 * np.pi * t)
             camera_y = radius * np.cos(4 * np.pi * t)
         else:
             raise ValueError(f"Unsupported animation style: {animation_style}")
-        # Compute displacements
-        displacement_x = k * camera_x * depth_tensor.squeeze()
-        displacement_y = k * camera_y * depth_tensor.squeeze()
-        # Calculate source coordinates
         source_pixel_x = pixel_grid[:, :, 0] + displacement_x
         source_pixel_y = pixel_grid[:, :, 1] + displacement_y
@@ -96,22 +148,34 @@ def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps
         grid_y = 2 * source_pixel_y / (H - 1) - 1
         grid = torch.stack((grid_x, grid_y), dim=-1).unsqueeze(0)
-        # Warp with bicubic interpolation
         warped = torch.nn.functional.grid_sample(image_tensor.unsqueeze(0), grid, mode='bicubic', align_corners=True)
-        # Downsample if SSAA is enabled
         if ssaa_factor > 1:
             downscale = Resize((image.height, image.width), antialias=True)
             warped = downscale(warped.squeeze(0)).unsqueeze(0)
-        # Convert to PIL image for upscaling or further processing
         frame_img = warped.squeeze(0).permute(1, 2, 0).cpu().numpy()
         frame_img = (frame_img * 255).astype(np.uint8)
-        frame_pil = Image.fromarray(frame_img)
-        # Apply AuraSR-v2 upscaling if enabled
         if use_upscale:
-            frame_pil = aura_sr.upscale_4x_overlapped(frame_pil)  # 4x upscaling
             frame_img = np.array(frame_pil)
         # Apply TAA if enabled
@@ -132,30 +196,51 @@ def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps
     return output_path
 # Gradio interface
-with gr.Blocks(title="Enhanced 3D Parallax Video Generator with Upscaling") as demo:
-    gr.Markdown("# Enhanced 3D Parallax Video Generator with Upscaling")
-    gr.Markdown("Create high-quality 3D parallax videos with advanced features and optional AuraSR-v2 upscaling.")
     with gr.Row():
         image_input = gr.Image(type="pil", label="Upload Image")
         depth_input = gr.Image(type="pil", label="Upload Depth Map")
     with gr.Row():
-        animation_style = gr.Dropdown(["horizontal", "vertical", "circle", "spiral"], label="Animation Style", value="horizontal")
         amplitude_slider = gr.Slider(0, 10, value=2, label="Amplitude", step=0.1)
         k_slider = gr.Slider(1, 20, value=5, label="Depth Scale (k)", step=0.1)
         fps_slider = gr.Slider(10, 60, value=30, label="FPS", step=1)
         duration_slider = gr.Slider(1, 10, value=5, label="Duration (s)", step=0.1)
         ssaa_factor = gr.Dropdown([1, 2, 4], label="SSAA Factor", value=1)
         use_taa = gr.Checkbox(label="Enable TAA", value=False)
         use_upscale = gr.Checkbox(label="Enable AuraSR-v2 Upscaling", value=False)
     generate_btn = gr.Button("Generate Video")
     video_output = gr.Video(label="Parallax Video")
     generate_btn.click(
         fn=generate_parallax_video,
-        inputs=[image_input, depth_input, animation_style, amplitude_slider, k_slider, fps_slider, duration_slider, ssaa_factor, use_taa, use_upscale],
         outputs=video_output
     )

 import spaces
 import tempfile
 from scipy.ndimage import gaussian_filter
+from aura_sr import AuraSR
+import cv2
+import torch.nn.functional as F
 # Load AuraSR-v2 model once at startup
 aura_sr = AuraSR.from_pretrained("fal/AuraSR-v2")
+# Post-processing functions
+def apply_lens_distortion(image, k1=0.2):
+    """Apply lens distortion using OpenCV."""
+    h, w = image.shape[:2]
+    camera_matrix = np.array([[w, 0, w / 2], [0, w, h / 2], [0, 0, 1]], dtype=np.float32)
+    dist_coeffs = np.array([k1, 0, 0, 0], dtype=np.float32)
+    distorted = cv2.undistort(image, camera_matrix, dist_coeffs)
+    return distorted
+def apply_depth_of_field(image_tensor, depth_tensor, focus_depth=0.5, blur_size=5):
+    """Apply depth of field blur using PyTorch."""
+    depth_diff = torch.abs(depth_tensor - focus_depth)
+    blur_kernel = blur_size * depth_diff.clamp(0, 1)
+    blur_kernel = blur_kernel.unsqueeze(0).unsqueeze(0)
+    padded_image = F.pad(image_tensor, (blur_size // 2, blur_size // 2, blur_size // 2, blur_size // 2), mode='reflect')
+    blurred = F.conv2d(padded_image, torch.ones(1, 1, blur_size, blur_size, device='cuda') / (blur_size ** 2), groups=3)
+    mask = (depth_diff < 0.1).float()
+    return image_tensor * mask + blurred * (1 - mask)
+def apply_vignette(image):
+    """Apply vignette effect."""
+    h, w = image.shape[:2]
+    x, y = np.meshgrid(np.arange(w), np.arange(h))
+    center_x, center_y = w / 2, h / 2
+    radius = np.sqrt((x - center_x) ** 2 + (y - center_y) ** 2)
+    max_radius = np.sqrt(center_x ** 2 + center_y ** 2)
+    vignette = 1 - (radius / max_radius) ** 2
+    vignette = np.clip(vignette, 0, 1)
+    return (image * vignette[..., np.newaxis]).astype(np.uint8)
+def parse_keyframes(keyframe_text):
+    """Parse keyframe text into time-position pairs."""
+    keyframes = []
+    try:
+        for entry in keyframe_text.split():
+            time, pos = entry.split(':')
+            x, y = map(float, pos.split(','))
+            keyframes.append((float(time), x, y))
+        keyframes.sort()  # Sort by time
+        return keyframes
+    except:
+        return [(0, 0, 0), (1, 0, 0)]  # Default fallback
+def interpolate_keyframes(t, keyframes):
+    """Interpolate camera position between keyframes."""
+    if t <= keyframes[0][0]:
+        return keyframes[0][1], keyframes[0][2]
+    if t >= keyframes[-1][0]:
+        return keyframes[-1][1], keyframes[-1][2]
+    for i in range(len(keyframes) - 1):
+        t1, x1, y1 = keyframes[i]
+        t2, x2, y2 = keyframes[i + 1]
+        if t1 <= t <= t2:
+            alpha = (t - t1) / (t2 - t1)
+            return x1 + alpha * (x2 - x1), y1 + alpha * (y2 - y1)
+    return 0, 0  # Fallback
 @spaces.GPU
+def generate_parallax_video(image, depth_map, animation_style, amplitude, k, fps, duration, ssaa_factor, use_taa, use_upscale, apply_lens, apply_dof, apply_vig, keyframe_text):
+    """Generate a 3D parallax video with advanced features."""
     # Validate input dimensions
     if image.size != depth_map.size:
         raise ValueError("Image and depth map must have the same dimensions")
+    # Convert to tensors
     image_tensor = ToTensor()(image).to('cuda', dtype=torch.float32)
     depth_tensor = ToTensor()(depth_map.convert('L')).to('cuda', dtype=torch.float32)
     depth_tensor = (depth_tensor - depth_tensor.min()) / (depth_tensor.max() - depth_tensor.min() + 1e-6)
         depth_tensor = upscale(depth_tensor)
     H, W = image_tensor.shape[1], image_tensor.shape[2]
     x = torch.arange(0, W).float().to('cuda')
     y = torch.arange(0, H).float().to('cuda')
     xx, yy = torch.meshgrid(x, y, indexing='xy')
     pixel_grid = torch.stack((xx, yy), dim=-1)
+    # Parse keyframes for custom path
+    keyframes = parse_keyframes(keyframe_text) if animation_style == "custom" else None
     # Generate frames
     num_frames = int(fps * duration)
     frames = []
     for frame in range(num_frames):
         t = frame / num_frames
+        if animation_style == "zoom":
+            zoom_factor = 1 + amplitude * np.sin(2 * np.pi * t)
+            displacement_x = (pixel_grid[:, :, 0] - W / 2) * (1 - zoom_factor) * depth_tensor.squeeze()
+            displacement_y = (pixel_grid[:, :, 1] - H / 2) * (1 - zoom_factor) * depth_tensor.squeeze()
+        elif animation_style == "horizontal":
             camera_x = amplitude * np.sin(2 * np.pi * t)
+            displacement_x = k * camera_x * depth_tensor.squeeze()
+            displacement_y = 0
         elif animation_style == "vertical":
             camera_y = amplitude * np.sin(2 * np.pi * t)
+            displacement_x = 0
+            displacement_y = k * camera_y * depth_tensor.squeeze()
         elif animation_style == "circle":
             camera_x = amplitude * np.sin(2 * np.pi * t)
             camera_y = amplitude * np.cos(2 * np.pi * t)
+            displacement_x = k * camera_x * depth_tensor.squeeze()
+            displacement_y = k * camera_y * depth_tensor.squeeze()
         elif animation_style == "spiral":
             radius = amplitude * (1 - t)
             camera_x = radius * np.sin(4 * np.pi * t)
             camera_y = radius * np.cos(4 * np.pi * t)
+            displacement_x = k * camera_x * depth_tensor.squeeze()
+            displacement_y = k * camera_y * depth_tensor.squeeze()
+        elif animation_style == "custom":
+            camera_x, camera_y = interpolate_keyframes(t, keyframes)
+            displacement_x = k * camera_x * depth_tensor.squeeze()
+            displacement_y = k * camera_y * depth_tensor.squeeze()
         else:
             raise ValueError(f"Unsupported animation style: {animation_style}")
         source_pixel_x = pixel_grid[:, :, 0] + displacement_x
         source_pixel_y = pixel_grid[:, :, 1] + displacement_y
         grid_y = 2 * source_pixel_y / (H - 1) - 1
         grid = torch.stack((grid_x, grid_y), dim=-1).unsqueeze(0)
+        # Warp image
         warped = torch.nn.functional.grid_sample(image_tensor.unsqueeze(0), grid, mode='bicubic', align_corners=True)
+        # Downsample if SSAA
         if ssaa_factor > 1:
             downscale = Resize((image.height, image.width), antialias=True)
             warped = downscale(warped.squeeze(0)).unsqueeze(0)
+        # Apply depth of field if enabled
+        if apply_dof:
+            warped = apply_depth_of_field(warped.squeeze(0), depth_tensor.squeeze(0))
+        # Convert to numpy
         frame_img = warped.squeeze(0).permute(1, 2, 0).cpu().numpy()
         frame_img = (frame_img * 255).astype(np.uint8)
+        # Apply lens distortion if enabled
+        if apply_lens:
+            frame_img = apply_lens_distortion(frame_img)
+        # Apply vignette if enabled
+        if apply_vig:
+            frame_img = apply_vignette(frame_img)
+        # Apply upscaling if enabled
         if use_upscale:
+            frame_pil = Image.fromarray(frame_img)
+            frame_pil = aura_sr.upscale_4x_overlapped(frame_pil)
             frame_img = np.array(frame_pil)
         # Apply TAA if enabled
     return output_path
 # Gradio interface
+with gr.Blocks(title="Ultimate 3D Parallax Video Generator") as demo:
+    gr.Markdown("# Ultimate 3D Parallax Video Generator")
+    gr.Markdown("Generate high-quality 3D parallax videos with advanced features, post-processing, and custom paths.")
     with gr.Row():
         image_input = gr.Image(type="pil", label="Upload Image")
         depth_input = gr.Image(type="pil", label="Upload Depth Map")
     with gr.Row():
+        animation_style = gr.Dropdown(
+            ["zoom", "horizontal", "vertical", "circle", "spiral", "custom"],
+            label="Animation Style",
+            value="horizontal"
+        )
         amplitude_slider = gr.Slider(0, 10, value=2, label="Amplitude", step=0.1)
         k_slider = gr.Slider(1, 20, value=5, label="Depth Scale (k)", step=0.1)
         fps_slider = gr.Slider(10, 60, value=30, label="FPS", step=1)
         duration_slider = gr.Slider(1, 10, value=5, label="Duration (s)", step=0.1)
+    with gr.Row():
         ssaa_factor = gr.Dropdown([1, 2, 4], label="SSAA Factor", value=1)
         use_taa = gr.Checkbox(label="Enable TAA", value=False)
         use_upscale = gr.Checkbox(label="Enable AuraSR-v2 Upscaling", value=False)
+        apply_lens = gr.Checkbox(label="Apply Lens Distortion", value=False)
+        apply_dof = gr.Checkbox(label="Apply Depth of Field", value=False)
+        apply_vig = gr.Checkbox(label="Apply Vignette", value=False)
+    with gr.Row():
+        keyframe_text = gr.Textbox(
+            label="Custom Keyframes (time:x,y)",
+            value="0:0,0 0.5:5,0 1:0,0",
+            placeholder="e.g., 0:0,0 0.5:5,0 1:0,0",
+            visible=True
+        )
     generate_btn = gr.Button("Generate Video")
     video_output = gr.Video(label="Parallax Video")
     generate_btn.click(
         fn=generate_parallax_video,
+        inputs=[
+            image_input, depth_input, animation_style, amplitude_slider, k_slider,
+            fps_slider, duration_slider, ssaa_factor, use_taa, use_upscale,
+            apply_lens, apply_dof, apply_vig, keyframe_text
+        ],
         outputs=video_output
     )