Spaces:

yuyutsu07
/

Pseudo3D

Running on Zero

App Files Files Community

yuyutsu07 commited on Mar 11

Commit

2552c4b

verified ·

1 Parent(s): a984096

Create app.py

Browse files

Files changed (1) hide show

app.py +100 -0

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import torch
+import gradio as gr
+import imageio
+import numpy as np
+from PIL import Image
+from torchvision.transforms import ToTensor
+import spaces
+import tempfile
+@spaces.GPU
+def generate_parallax_video(image, depth_map, T_max=10, k=50, fps=30, duration=5):
+    """
+    Generate a 5-second 3D parallax video from an image and depth map.
+    Parameters:
+    - image (PIL.Image): Input image.
+    - depth_map (PIL.Image): Depth map (grayscale).
+    - T_max (float): Maximum camera translation amplitude.
+    - k (float): Depth displacement scale factor.
+    - fps (int): Frames per second.
+    - duration (int): Video duration in seconds.
+    Returns:
+    - str: Path to the generated video file.
+    """
+    # Validate input sizes
+    if image.size != depth_map.size:
+        raise ValueError("Image and depth map must be the same size")
+    # Convert to PyTorch tensors and move to GPU
+    image_tensor = ToTensor()(image).unsqueeze(0).to('cuda')  # Shape: (1, 3, H, W)
+    depth_tensor = ToTensor()(depth_map.convert('L')).to('cuda')  # Shape: (1, 1, H, W)
+    depth_tensor = (depth_tensor - depth_tensor.min()) / (depth_tensor.max() - depth_tensor.min() + 1e-6)
+    depth_tensor = depth_tensor.squeeze(0).squeeze(0)  # Shape: (H, W)
+    H, W = image_tensor.shape[2], image_tensor.shape[3]
+    # Create base pixel grid
+    x = torch.arange(0, W).float().to('cuda')
+    y = torch.arange(0, H).float().to('cuda')
+    xx, yy = torch.meshgrid(x, y, indexing='ij')
+    pixel_grid = torch.stack((xx, yy), dim=-1)  # Shape: (H, W, 2)
+    # Generate frames
+    num_frames = int(fps * duration)
+    frames = []
+    for frame in range(num_frames):
+        # Simulate horizontal camera movement
+        T = T_max * np.sin(2 * np.pi * frame / num_frames)
+        displacement = k * T * depth_tensor  # Shape: (H, W)
+        # Compute source coordinates
+        source_pixel_x = pixel_grid[:, :, 0] - displacement
+        source_pixel_y = pixel_grid[:, :, 1]
+        # Normalize to [-1, 1] for grid_sample
+        grid_x = 2 * source_pixel_x / (W - 1) - 1
+        grid_y = 2 * source_pixel_y / (H - 1) - 1
+        grid = torch.stack((grid_x, grid_y), dim=-1).unsqueeze(0)  # Shape: (1, H, W, 2)
+        # Warp the image
+        warped = torch.nn.functional.grid_sample(image_tensor, grid, align_corners=True)
+        # Convert to numpy for video
+        warped_np = warped.squeeze(0).permute(1, 2, 0).cpu().numpy()  # Shape: (H, W, 3)
+        frame_img = (warped_np * 255).astype(np.uint8)
+        frames.append(frame_img)
+    # Save video to a temporary file
+    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
+        output_path = tmpfile.name
+        imageio.mimwrite(output_path, frames, fps=fps, quality=8)
+    return output_path
+# Gradio interface
+with gr.Blocks(title="3D Parallax Video Generator") as demo:
+    gr.Markdown("# 3D Parallax Video Generator")
+    gr.Markdown("Upload an image and its depth map to create a 5-second 3D parallax video.")
+    with gr.Row():
+        image_input = gr.Image(type="pil", label="Upload Image")
+        depth_input = gr.Image(type="pil", label="Upload Depth Map")
+    with gr.Row():
+        T_max_slider = gr.Slider(minimum=1, maximum=50, value=10, label="Camera Amplitude (T_max)")
+        k_slider = gr.Slider(minimum=1, maximum=100, value=50, label="Depth Scale (k)")
+        fps_slider = gr.Slider(minimum=10, maximum=60, value=30, label="Frames Per Second")
+    generate_btn = gr.Button("Generate Video")
+    video_output = gr.Video(label="Parallax Video")
+    generate_btn.click(
+        fn=generate_parallax_video,
+        inputs=[image_input, depth_input, T_max_slider, k_slider, fps_slider],
+        outputs=video_output
+    )
+demo.launch()