Spaces:

NihalGazi
/

EBSynth

Running

App Files Files Community

NihalGazi commited on Feb 16

Commit

4bc60f1

verified ·

1 Parent(s): cdbcfda

Create app.py

Browse files

Files changed (1) hide show

app.py +268 -0

app.py ADDED Viewed

	@@ -0,0 +1,268 @@

+import cv2
+import numpy as np
+import os
+import tempfile
+from tqdm import tqdm
+import gradio as gr
+import ffmpeg
+def extract_frames(video_path):
+    """
+    Extracts all frames from the input video.
+    """
+    cap = cv2.VideoCapture(video_path)
+    frames = []
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frames.append(frame)
+    cap.release()
+    print(f"Extracted {len(frames)} frames from {video_path}")
+    return frames
+def apply_style_propagation(frames, style_image_path,
+                            enable_temporal_reset=True,
+                            enable_median_filtering=True,
+                            enable_patch_based=True,
+                            enable_sharpening=True):
+    """
+    Applies the style from the provided keyframe image to every frame using optical flow,
+    with additional corrections controlled by boolean flags:
+      - Temporal Reset/Re‑anchoring (if enabled)
+      - Median filtering of the flow (if enabled)
+      - Patch‑based correction for extreme flow (if enabled)
+      - Sharpening after warping (if enabled)
+    """
+    # Load and resize the style image to match video dimensions.
+    style_image = cv2.imread(style_image_path)
+    if style_image is None:
+        raise ValueError(f"Failed to load style image from {style_image_path}")
+    h, w = frames[0].shape[:2]
+    style_image = cv2.resize(style_image, (w, h))
+    # Keep a copy for temporal re-anchoring.
+    original_styled = style_image.copy()
+    styled_frames = [style_image]
+    prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
+    # Parameters for corrections:
+    reset_interval = 30         # Every 30 frames, blend with original style.
+    block_size = 16             # Size of block for patch matching.
+    patch_threshold = 10        # Threshold for mean flow magnitude in a block.
+    search_margin = 10          # Margin around block for patch matching.
+    for i in tqdm(range(1, len(frames)), desc="Propagating style"):
+        curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
+        flow = cv2.calcOpticalFlowFarneback(
+            prev_gray, curr_gray, None,
+            pyr_scale=0.5, levels=3, winsize=15,
+            iterations=3, poly_n=5, poly_sigma=1.2, flags=0
+        )
+        # --- Method 3: Median Filtering of the Flow ---
+        if enable_median_filtering:
+            flow_x = flow[..., 0]
+            flow_y = flow[..., 1]
+            flow_x_filtered = cv2.medianBlur(flow_x, 3)
+            flow_y_filtered = cv2.medianBlur(flow_y, 3)
+            flow_filtered = np.dstack((flow_x_filtered, flow_y_filtered))
+        else:
+            flow_filtered = flow
+        # --- Method 4: Patch-based Correction for Extreme Flow ---
+        if enable_patch_based:
+            flow_corrected = flow_filtered.copy()
+            for by in range(0, h, block_size):
+                for bx in range(0, w, block_size):
+                    # Define block region (handle edges)
+                    y1, y2 = by, min(by + block_size, h)
+                    x1, x2 = bx, min(bx + block_size, w)
+                    block_flow = flow_filtered[y1:y2, x1:x2]
+                    mag = np.sqrt(block_flow[..., 0]**2 + block_flow[..., 1]**2)
+                    mean_mag = np.mean(mag)
+                    if mean_mag > patch_threshold:
+                        # Use patch matching to recalc flow for this block.
+                        patch = prev_gray[y1:y2, x1:x2]
+                        sx1 = max(x1 - search_margin, 0)
+                        sy1 = max(by - search_margin, 0)
+                        sx2 = min(x2 + search_margin, w)
+                        sy2 = min(y2 + search_margin, h)
+                        search_region = curr_gray[sy1:sy2, sx1:sx2]
+                        if search_region.shape[0] < patch.shape[0] or search_region.shape[1] < patch.shape[1]:
+                            continue
+                        res = cv2.matchTemplate(search_region, patch, cv2.TM_SQDIFF_NORMED)
+                        _, _, min_loc, _ = cv2.minMaxLoc(res)
+                        best_x = sx1 + min_loc[0]
+                        best_y = sy1 + min_loc[1]
+                        offset_x = best_x - x1
+                        offset_y = best_y - by
+                        flow_corrected[y1:y2, x1:x2, 0] = offset_x
+                        flow_corrected[y1:y2, x1:x2, 1] = offset_y
+        else:
+            flow_corrected = flow_filtered
+        # Compute mapping coordinates.
+        grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
+        map_x = grid_x + flow_corrected[..., 0]
+        map_y = grid_y + flow_corrected[..., 1]
+        map_x = np.clip(map_x, 0, w - 1).astype(np.float32)
+        map_y = np.clip(map_y, 0, h - 1).astype(np.float32)
+        # Warp the previous styled frame.
+        warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR)
+        # --- Method 2: Temporal Reset/Re-anchoring ---
+        if enable_temporal_reset and (i % reset_interval == 0):
+            warped_styled = cv2.addWeighted(warped_styled, 0.7, original_styled, 0.3, 0)
+        # --- Method 5: Sharpening Post-Warping ---
+        if enable_sharpening:
+            kernel = np.array([[0, -1, 0],
+                               [-1, 5, -1],
+                               [0, -1, 0]], dtype=np.float32)
+            warped_styled = cv2.filter2D(warped_styled, -1, kernel)
+        styled_frames.append(warped_styled)
+        prev_gray = curr_gray
+    print(f"Propagated style to {len(styled_frames)} frames.")
+    sample_frame = styled_frames[len(styled_frames) // 2]
+    print(f"Sample styled frame mean intensity: {np.mean(sample_frame):.2f}")
+    return styled_frames
+def save_video_cv2(frames, output_path, fps=30):
+    """
+    Saves a list of frames as a video using OpenCV.
+    """
+    h, w, _ = frames[0].shape
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    writer = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
+    for frame in frames:
+        writer.write(frame)
+    writer.release()
+    size = os.path.getsize(output_path)
+    print(f"Intermediate video saved to {output_path} (size: {size} bytes)")
+def process_video(video_file, style_image_file, fps=30, target_width=0, target_height=0,
+                  enable_temporal_reset=True,
+                  enable_median_filtering=True,
+                  enable_patch_based=True,
+                  enable_sharpening=True):
+    """
+    Processes the input video by applying the style image via optical flow propagation,
+    with optional corrections (temporal reset, median filtering, patch-based correction, sharpening).
+    Optionally downscale the video and style image to the specified resolution.
+    Then re-encodes the video with FFmpeg for web compatibility.
+    Parameters:
+      - video_file: The input video file.
+      - style_image_file: The stylized keyframe image.
+      - fps: Output frames per second.
+      - target_width: Target width for downscaling (0 for original).
+      - target_height: Target height for downscaling (0 for original).
+      - enable_temporal_reset: Boolean flag for temporal reset.
+      - enable_median_filtering: Boolean flag for median filtering of flow.
+      - enable_patch_based: Boolean flag for patch-based correction.
+      - enable_sharpening: Boolean flag for sharpening post-warp.
+    Returns:
+      - Path to the final output video.
+    """
+    # Get the video file path.
+    video_path = video_file if isinstance(video_file, str) else video_file["name"]
+    # Process the style image input.
+    if isinstance(style_image_file, str):
+        style_image_path = style_image_file
+    elif isinstance(style_image_file, dict) and "name" in style_image_file:
+        style_image_path = style_image_file["name"]
+    elif isinstance(style_image_file, np.ndarray):
+        tmp_style = os.path.join(tempfile.gettempdir(), "temp_style_image.jpeg")
+        cv2.imwrite(tmp_style, cv2.cvtColor(style_image_file, cv2.COLOR_RGB2BGR))
+        style_image_path = tmp_style
+    else:
+        return "Error: Unsupported style image format."
+    # Extract frames from the video.
+    frames = extract_frames(video_path)
+    if not frames:
+        return "Error: No frames extracted from the video."
+    original_h, original_w = frames[0].shape[:2]
+    print(f"Original video resolution: {original_w}x{original_h}")
+    # Downscale if target dimensions are provided.
+    if target_width > 0 and target_height > 0:
+        print(f"Downscaling frames to resolution: {target_width}x{target_height}")
+        frames = [cv2.resize(frame, (target_width, target_height)) for frame in frames]
+    else:
+        print("No downscaling applied. Using original resolution.")
+    # Propagate style with the selected corrections.
+    styled_frames = apply_style_propagation(frames, style_image_path,
+                                            enable_temporal_reset=enable_temporal_reset,
+                                            enable_median_filtering=enable_median_filtering,
+                                            enable_patch_based=enable_patch_based,
+                                            enable_sharpening=enable_sharpening)
+    # Save intermediate video using OpenCV to a named temporary file.
+    temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    temp_video_file.close()
+    temp_video_path = temp_video_file.name
+    save_video_cv2(styled_frames, temp_video_path, fps=fps)
+    # Re-encode the video using FFmpeg for browser compatibility.
+    output_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
+    output_video_file.close()
+    output_video_path = output_video_file.name
+    try:
+        (
+            ffmpeg
+            .input(temp_video_path)
+            .output(output_video_path, vcodec='libx264', pix_fmt='yuv420p', r=fps)
+            .run(overwrite_output=True, quiet=True)
+        )
+    except ffmpeg.Error as e:
+        print("FFmpeg error:", e)
+        return "Error during video re-encoding."
+    final_size = os.path.getsize(output_video_path)
+    print(f"Output video saved to {output_video_path} (size: {final_size} bytes)")
+    if final_size == 0:
+        return "Error: Output video file is empty."
+    # Clean up the intermediate file.
+    os.remove(temp_video_path)
+    return output_video_path
+iface = gr.Interface(
+    fn=process_video,
+    inputs=[
+        gr.Video(label="Input Video (v.mp4)"),
+        gr.Image(label="Stylized Keyframe (a.jpeg)"),
+        gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Output FPS"),
+        gr.Slider(minimum=0, maximum=1920, step=1, value=0, label="Target Width (0 for original)"),
+        gr.Slider(minimum=0, maximum=1080, step=1, value=0, label="Target Height (0 for original)"),
+        gr.Checkbox(label="Enable Temporal Reset", value=True),
+        gr.Checkbox(label="Enable Median Filtering", value=True),
+        gr.Checkbox(label="Enable Patch-Based Correction", value=True),
+        gr.Checkbox(label="Enable Sharpening", value=True)
+    ],
+    outputs=gr.Video(label="Styled Video"),
+    title="Optical Flow Style Propagation with Corrections",
+    description=(
+        "Upload a video and a stylized keyframe image. Optionally downscale to a target resolution.\n"
+        "You can enable/disable the following corrections:\n"
+        "• Temporal Reset/Re-anchoring\n"
+        "• Median Filtering of Flow\n"
+        "• Patch-Based Correction for Extreme Flow\n"
+        "• Sharpening Post-Warping\n"
+        "The output video is re-encoded for web compatibility."
+    )
+)
+if __name__ == "__main__":
+    iface.launch(share=True)