NihalGazi commited on
Commit
4bc60f1
·
verified ·
1 Parent(s): cdbcfda

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -0
app.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import os
4
+ import tempfile
5
+ from tqdm import tqdm
6
+ import gradio as gr
7
+ import ffmpeg
8
+
9
+ def extract_frames(video_path):
10
+ """
11
+ Extracts all frames from the input video.
12
+ """
13
+ cap = cv2.VideoCapture(video_path)
14
+ frames = []
15
+ while True:
16
+ ret, frame = cap.read()
17
+ if not ret:
18
+ break
19
+ frames.append(frame)
20
+ cap.release()
21
+ print(f"Extracted {len(frames)} frames from {video_path}")
22
+ return frames
23
+
24
+ def apply_style_propagation(frames, style_image_path,
25
+ enable_temporal_reset=True,
26
+ enable_median_filtering=True,
27
+ enable_patch_based=True,
28
+ enable_sharpening=True):
29
+ """
30
+ Applies the style from the provided keyframe image to every frame using optical flow,
31
+ with additional corrections controlled by boolean flags:
32
+ - Temporal Reset/Re‑anchoring (if enabled)
33
+ - Median filtering of the flow (if enabled)
34
+ - Patch‑based correction for extreme flow (if enabled)
35
+ - Sharpening after warping (if enabled)
36
+ """
37
+ # Load and resize the style image to match video dimensions.
38
+ style_image = cv2.imread(style_image_path)
39
+ if style_image is None:
40
+ raise ValueError(f"Failed to load style image from {style_image_path}")
41
+ h, w = frames[0].shape[:2]
42
+ style_image = cv2.resize(style_image, (w, h))
43
+ # Keep a copy for temporal re-anchoring.
44
+ original_styled = style_image.copy()
45
+
46
+ styled_frames = [style_image]
47
+ prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
48
+
49
+ # Parameters for corrections:
50
+ reset_interval = 30 # Every 30 frames, blend with original style.
51
+ block_size = 16 # Size of block for patch matching.
52
+ patch_threshold = 10 # Threshold for mean flow magnitude in a block.
53
+ search_margin = 10 # Margin around block for patch matching.
54
+
55
+ for i in tqdm(range(1, len(frames)), desc="Propagating style"):
56
+ curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
57
+ flow = cv2.calcOpticalFlowFarneback(
58
+ prev_gray, curr_gray, None,
59
+ pyr_scale=0.5, levels=3, winsize=15,
60
+ iterations=3, poly_n=5, poly_sigma=1.2, flags=0
61
+ )
62
+
63
+ # --- Method 3: Median Filtering of the Flow ---
64
+ if enable_median_filtering:
65
+ flow_x = flow[..., 0]
66
+ flow_y = flow[..., 1]
67
+ flow_x_filtered = cv2.medianBlur(flow_x, 3)
68
+ flow_y_filtered = cv2.medianBlur(flow_y, 3)
69
+ flow_filtered = np.dstack((flow_x_filtered, flow_y_filtered))
70
+ else:
71
+ flow_filtered = flow
72
+
73
+ # --- Method 4: Patch-based Correction for Extreme Flow ---
74
+ if enable_patch_based:
75
+ flow_corrected = flow_filtered.copy()
76
+ for by in range(0, h, block_size):
77
+ for bx in range(0, w, block_size):
78
+ # Define block region (handle edges)
79
+ y1, y2 = by, min(by + block_size, h)
80
+ x1, x2 = bx, min(bx + block_size, w)
81
+ block_flow = flow_filtered[y1:y2, x1:x2]
82
+ mag = np.sqrt(block_flow[..., 0]**2 + block_flow[..., 1]**2)
83
+ mean_mag = np.mean(mag)
84
+ if mean_mag > patch_threshold:
85
+ # Use patch matching to recalc flow for this block.
86
+ patch = prev_gray[y1:y2, x1:x2]
87
+ sx1 = max(x1 - search_margin, 0)
88
+ sy1 = max(by - search_margin, 0)
89
+ sx2 = min(x2 + search_margin, w)
90
+ sy2 = min(y2 + search_margin, h)
91
+ search_region = curr_gray[sy1:sy2, sx1:sx2]
92
+ if search_region.shape[0] < patch.shape[0] or search_region.shape[1] < patch.shape[1]:
93
+ continue
94
+ res = cv2.matchTemplate(search_region, patch, cv2.TM_SQDIFF_NORMED)
95
+ _, _, min_loc, _ = cv2.minMaxLoc(res)
96
+ best_x = sx1 + min_loc[0]
97
+ best_y = sy1 + min_loc[1]
98
+ offset_x = best_x - x1
99
+ offset_y = best_y - by
100
+ flow_corrected[y1:y2, x1:x2, 0] = offset_x
101
+ flow_corrected[y1:y2, x1:x2, 1] = offset_y
102
+ else:
103
+ flow_corrected = flow_filtered
104
+
105
+ # Compute mapping coordinates.
106
+ grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h))
107
+ map_x = grid_x + flow_corrected[..., 0]
108
+ map_y = grid_y + flow_corrected[..., 1]
109
+ map_x = np.clip(map_x, 0, w - 1).astype(np.float32)
110
+ map_y = np.clip(map_y, 0, h - 1).astype(np.float32)
111
+
112
+ # Warp the previous styled frame.
113
+ warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR)
114
+
115
+ # --- Method 2: Temporal Reset/Re-anchoring ---
116
+ if enable_temporal_reset and (i % reset_interval == 0):
117
+ warped_styled = cv2.addWeighted(warped_styled, 0.7, original_styled, 0.3, 0)
118
+
119
+ # --- Method 5: Sharpening Post-Warping ---
120
+ if enable_sharpening:
121
+ kernel = np.array([[0, -1, 0],
122
+ [-1, 5, -1],
123
+ [0, -1, 0]], dtype=np.float32)
124
+ warped_styled = cv2.filter2D(warped_styled, -1, kernel)
125
+
126
+ styled_frames.append(warped_styled)
127
+ prev_gray = curr_gray
128
+
129
+ print(f"Propagated style to {len(styled_frames)} frames.")
130
+ sample_frame = styled_frames[len(styled_frames) // 2]
131
+ print(f"Sample styled frame mean intensity: {np.mean(sample_frame):.2f}")
132
+ return styled_frames
133
+
134
+ def save_video_cv2(frames, output_path, fps=30):
135
+ """
136
+ Saves a list of frames as a video using OpenCV.
137
+ """
138
+ h, w, _ = frames[0].shape
139
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
140
+ writer = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
141
+ for frame in frames:
142
+ writer.write(frame)
143
+ writer.release()
144
+ size = os.path.getsize(output_path)
145
+ print(f"Intermediate video saved to {output_path} (size: {size} bytes)")
146
+
147
+ def process_video(video_file, style_image_file, fps=30, target_width=0, target_height=0,
148
+ enable_temporal_reset=True,
149
+ enable_median_filtering=True,
150
+ enable_patch_based=True,
151
+ enable_sharpening=True):
152
+ """
153
+ Processes the input video by applying the style image via optical flow propagation,
154
+ with optional corrections (temporal reset, median filtering, patch-based correction, sharpening).
155
+ Optionally downscale the video and style image to the specified resolution.
156
+ Then re-encodes the video with FFmpeg for web compatibility.
157
+
158
+ Parameters:
159
+ - video_file: The input video file.
160
+ - style_image_file: The stylized keyframe image.
161
+ - fps: Output frames per second.
162
+ - target_width: Target width for downscaling (0 for original).
163
+ - target_height: Target height for downscaling (0 for original).
164
+ - enable_temporal_reset: Boolean flag for temporal reset.
165
+ - enable_median_filtering: Boolean flag for median filtering of flow.
166
+ - enable_patch_based: Boolean flag for patch-based correction.
167
+ - enable_sharpening: Boolean flag for sharpening post-warp.
168
+
169
+ Returns:
170
+ - Path to the final output video.
171
+ """
172
+ # Get the video file path.
173
+ video_path = video_file if isinstance(video_file, str) else video_file["name"]
174
+
175
+ # Process the style image input.
176
+ if isinstance(style_image_file, str):
177
+ style_image_path = style_image_file
178
+ elif isinstance(style_image_file, dict) and "name" in style_image_file:
179
+ style_image_path = style_image_file["name"]
180
+ elif isinstance(style_image_file, np.ndarray):
181
+ tmp_style = os.path.join(tempfile.gettempdir(), "temp_style_image.jpeg")
182
+ cv2.imwrite(tmp_style, cv2.cvtColor(style_image_file, cv2.COLOR_RGB2BGR))
183
+ style_image_path = tmp_style
184
+ else:
185
+ return "Error: Unsupported style image format."
186
+
187
+ # Extract frames from the video.
188
+ frames = extract_frames(video_path)
189
+ if not frames:
190
+ return "Error: No frames extracted from the video."
191
+
192
+ original_h, original_w = frames[0].shape[:2]
193
+ print(f"Original video resolution: {original_w}x{original_h}")
194
+
195
+ # Downscale if target dimensions are provided.
196
+ if target_width > 0 and target_height > 0:
197
+ print(f"Downscaling frames to resolution: {target_width}x{target_height}")
198
+ frames = [cv2.resize(frame, (target_width, target_height)) for frame in frames]
199
+ else:
200
+ print("No downscaling applied. Using original resolution.")
201
+
202
+ # Propagate style with the selected corrections.
203
+ styled_frames = apply_style_propagation(frames, style_image_path,
204
+ enable_temporal_reset=enable_temporal_reset,
205
+ enable_median_filtering=enable_median_filtering,
206
+ enable_patch_based=enable_patch_based,
207
+ enable_sharpening=enable_sharpening)
208
+
209
+ # Save intermediate video using OpenCV to a named temporary file.
210
+ temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
211
+ temp_video_file.close()
212
+ temp_video_path = temp_video_file.name
213
+ save_video_cv2(styled_frames, temp_video_path, fps=fps)
214
+
215
+ # Re-encode the video using FFmpeg for browser compatibility.
216
+ output_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
217
+ output_video_file.close()
218
+ output_video_path = output_video_file.name
219
+
220
+ try:
221
+ (
222
+ ffmpeg
223
+ .input(temp_video_path)
224
+ .output(output_video_path, vcodec='libx264', pix_fmt='yuv420p', r=fps)
225
+ .run(overwrite_output=True, quiet=True)
226
+ )
227
+ except ffmpeg.Error as e:
228
+ print("FFmpeg error:", e)
229
+ return "Error during video re-encoding."
230
+
231
+ final_size = os.path.getsize(output_video_path)
232
+ print(f"Output video saved to {output_video_path} (size: {final_size} bytes)")
233
+ if final_size == 0:
234
+ return "Error: Output video file is empty."
235
+
236
+ # Clean up the intermediate file.
237
+ os.remove(temp_video_path)
238
+
239
+ return output_video_path
240
+
241
+ iface = gr.Interface(
242
+ fn=process_video,
243
+ inputs=[
244
+ gr.Video(label="Input Video (v.mp4)"),
245
+ gr.Image(label="Stylized Keyframe (a.jpeg)"),
246
+ gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Output FPS"),
247
+ gr.Slider(minimum=0, maximum=1920, step=1, value=0, label="Target Width (0 for original)"),
248
+ gr.Slider(minimum=0, maximum=1080, step=1, value=0, label="Target Height (0 for original)"),
249
+ gr.Checkbox(label="Enable Temporal Reset", value=True),
250
+ gr.Checkbox(label="Enable Median Filtering", value=True),
251
+ gr.Checkbox(label="Enable Patch-Based Correction", value=True),
252
+ gr.Checkbox(label="Enable Sharpening", value=True)
253
+ ],
254
+ outputs=gr.Video(label="Styled Video"),
255
+ title="Optical Flow Style Propagation with Corrections",
256
+ description=(
257
+ "Upload a video and a stylized keyframe image. Optionally downscale to a target resolution.\n"
258
+ "You can enable/disable the following corrections:\n"
259
+ "• Temporal Reset/Re-anchoring\n"
260
+ "• Median Filtering of Flow\n"
261
+ "• Patch-Based Correction for Extreme Flow\n"
262
+ "• Sharpening Post-Warping\n"
263
+ "The output video is re-encoded for web compatibility."
264
+ )
265
+ )
266
+
267
+ if __name__ == "__main__":
268
+ iface.launch(share=True)