Spaces:
Running
on
Zero
Running
on
Zero
Miquel Farre
commited on
Commit
·
524fae7
1
Parent(s):
d068f9e
two highlights path
Browse files
app.py
CHANGED
@@ -83,12 +83,17 @@ class VideoHighlightDetector:
|
|
83 |
outputs = self.model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
|
84 |
return self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
|
85 |
|
86 |
-
def determine_highlights(self, video_description: str) -> str:
|
87 |
-
"""Determine what constitutes highlights based on video description."""
|
|
|
|
|
|
|
|
|
|
|
88 |
messages = [
|
89 |
{
|
90 |
"role": "system",
|
91 |
-
"content": [{"type": "text", "text":
|
92 |
},
|
93 |
{
|
94 |
"role": "user",
|
@@ -96,6 +101,7 @@ class VideoHighlightDetector:
|
|
96 |
}
|
97 |
]
|
98 |
|
|
|
99 |
print(messages)
|
100 |
|
101 |
inputs = self.processor.apply_chat_template(
|
@@ -299,15 +305,17 @@ def create_ui(examples_path: str, model_path: str):
|
|
299 |
formatted_desc = f"### Summary:\n {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
|
300 |
|
301 |
yield [
|
302 |
-
"Determining highlight types...",
|
303 |
formatted_desc,
|
304 |
"",
|
305 |
gr.update(visible=False),
|
306 |
gr.update(visible=True)
|
307 |
]
|
308 |
|
309 |
-
|
310 |
-
|
|
|
|
|
311 |
|
312 |
# Split video into segments
|
313 |
temp_dir = "temp_segments"
|
@@ -315,7 +323,8 @@ def create_ui(examples_path: str, model_path: str):
|
|
315 |
|
316 |
segment_length = 10.0
|
317 |
duration = get_video_duration_seconds(video)
|
318 |
-
|
|
|
319 |
segments_processed = 0
|
320 |
total_segments = int(duration / segment_length)
|
321 |
|
@@ -348,10 +357,14 @@ def create_ui(examples_path: str, model_path: str):
|
|
348 |
]
|
349 |
subprocess.run(cmd, check=True)
|
350 |
|
351 |
-
# Process segment
|
352 |
-
if detector.process_segment(segment_path,
|
353 |
-
print("KEEPING SEGMENT")
|
354 |
-
|
|
|
|
|
|
|
|
|
355 |
|
356 |
# Clean up segment file
|
357 |
os.remove(segment_path)
|
@@ -359,14 +372,33 @@ def create_ui(examples_path: str, model_path: str):
|
|
359 |
# Remove temp directory
|
360 |
os.rmdir(temp_dir)
|
361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
# Create final video
|
363 |
-
if
|
364 |
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
|
365 |
temp_output = tmp_file.name
|
366 |
-
detector._concatenate_scenes(video,
|
|
|
|
|
|
|
|
|
|
|
367 |
|
368 |
yield [
|
369 |
-
|
370 |
formatted_desc,
|
371 |
formatted_highlights,
|
372 |
gr.update(value=temp_output, visible=True),
|
@@ -374,7 +406,7 @@ def create_ui(examples_path: str, model_path: str):
|
|
374 |
]
|
375 |
else:
|
376 |
yield [
|
377 |
-
"No highlights detected in the video.",
|
378 |
formatted_desc,
|
379 |
formatted_highlights,
|
380 |
gr.update(visible=False),
|
@@ -394,6 +426,7 @@ def create_ui(examples_path: str, model_path: str):
|
|
394 |
# Clean up
|
395 |
torch.cuda.empty_cache()
|
396 |
|
|
|
397 |
process_btn.click(
|
398 |
on_process,
|
399 |
inputs=[input_video],
|
|
|
83 |
outputs = self.model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
|
84 |
return self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
|
85 |
|
86 |
+
def determine_highlights(self, video_description: str, prompt_num: int = 1) -> str:
|
87 |
+
"""Determine what constitutes highlights based on video description with different prompts."""
|
88 |
+
system_prompts = {
|
89 |
+
1: "You are a highlight editor. List archetypal dramatic moments that would make compelling highlights if they appear in the video. Each moment should be specific enough to be recognizable but generic enough to potentially exist in any video of this type.",
|
90 |
+
2: "You are a highlight editor focusing on subtle and nuanced moments. List quieter, character-driven, or atmospheric moments that would make interesting highlights. Focus on moments that might be overlooked by traditional highlight detection but add depth to the story."
|
91 |
+
}
|
92 |
+
|
93 |
messages = [
|
94 |
{
|
95 |
"role": "system",
|
96 |
+
"content": [{"type": "text", "text": system_prompts[prompt_num]}]
|
97 |
},
|
98 |
{
|
99 |
"role": "user",
|
|
|
101 |
}
|
102 |
]
|
103 |
|
104 |
+
print(f"Using prompt {prompt_num} for highlight detection")
|
105 |
print(messages)
|
106 |
|
107 |
inputs = self.processor.apply_chat_template(
|
|
|
305 |
formatted_desc = f"### Summary:\n {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
|
306 |
|
307 |
yield [
|
308 |
+
"Determining highlight types (2 variations)...",
|
309 |
formatted_desc,
|
310 |
"",
|
311 |
gr.update(visible=False),
|
312 |
gr.update(visible=True)
|
313 |
]
|
314 |
|
315 |
+
# Get two different sets of highlights
|
316 |
+
highlights1 = detector.determine_highlights(video_desc, prompt_num=1)
|
317 |
+
highlights2 = detector.determine_highlights(video_desc, prompt_num=2)
|
318 |
+
formatted_highlights = f"### Highlights to search for:\nSet 1:\n{highlights1[:500] + '...' if len(highlights1) > 500 else highlights1}\n\nSet 2:\n{highlights2[:500] + '...' if len(highlights2) > 500 else highlights2}"
|
319 |
|
320 |
# Split video into segments
|
321 |
temp_dir = "temp_segments"
|
|
|
323 |
|
324 |
segment_length = 10.0
|
325 |
duration = get_video_duration_seconds(video)
|
326 |
+
kept_segments1 = []
|
327 |
+
kept_segments2 = []
|
328 |
segments_processed = 0
|
329 |
total_segments = int(duration / segment_length)
|
330 |
|
|
|
357 |
]
|
358 |
subprocess.run(cmd, check=True)
|
359 |
|
360 |
+
# Process segment with both highlight sets
|
361 |
+
if detector.process_segment(segment_path, highlights1):
|
362 |
+
print("KEEPING SEGMENT FOR SET 1")
|
363 |
+
kept_segments1.append((start_time, end_time))
|
364 |
+
|
365 |
+
if detector.process_segment(segment_path, highlights2):
|
366 |
+
print("KEEPING SEGMENT FOR SET 2")
|
367 |
+
kept_segments2.append((start_time, end_time))
|
368 |
|
369 |
# Clean up segment file
|
370 |
os.remove(segment_path)
|
|
|
372 |
# Remove temp directory
|
373 |
os.rmdir(temp_dir)
|
374 |
|
375 |
+
# Calculate percentages of video kept for each highlight set
|
376 |
+
total_duration = duration
|
377 |
+
duration1 = sum(end - start for start, end in kept_segments1)
|
378 |
+
duration2 = sum(end - start for start, end in kept_segments2)
|
379 |
+
|
380 |
+
percent1 = (duration1 / total_duration) * 100
|
381 |
+
percent2 = (duration2 / total_duration) * 100
|
382 |
+
|
383 |
+
print(f"Highlight set 1: {percent1:.1f}% of video")
|
384 |
+
print(f"Highlight set 2: {percent2:.1f}% of video")
|
385 |
+
|
386 |
+
# Choose the set with lower percentage unless it's zero
|
387 |
+
final_segments = kept_segments2 if (0 < percent2 <= percent1 or percent1 == 0) else kept_segments1
|
388 |
+
|
389 |
# Create final video
|
390 |
+
if final_segments:
|
391 |
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
|
392 |
temp_output = tmp_file.name
|
393 |
+
detector._concatenate_scenes(video, final_segments, temp_output)
|
394 |
+
|
395 |
+
selected_set = "2" if final_segments == kept_segments2 else "1"
|
396 |
+
percent_used = percent2 if final_segments == kept_segments2 else percent1
|
397 |
+
|
398 |
+
completion_message = f"Processing complete! Used highlight set {selected_set} ({percent_used:.1f}% of video)"
|
399 |
|
400 |
yield [
|
401 |
+
completion_message,
|
402 |
formatted_desc,
|
403 |
formatted_highlights,
|
404 |
gr.update(value=temp_output, visible=True),
|
|
|
406 |
]
|
407 |
else:
|
408 |
yield [
|
409 |
+
"No highlights detected in the video with either set of criteria.",
|
410 |
formatted_desc,
|
411 |
formatted_highlights,
|
412 |
gr.update(visible=False),
|
|
|
426 |
# Clean up
|
427 |
torch.cuda.empty_cache()
|
428 |
|
429 |
+
|
430 |
process_btn.click(
|
431 |
on_process,
|
432 |
inputs=[input_video],
|