Miquel Farre commited on
Commit
524fae7
·
1 Parent(s): d068f9e

two highlights path

Browse files
Files changed (1) hide show
  1. app.py +48 -15
app.py CHANGED
@@ -83,12 +83,17 @@ class VideoHighlightDetector:
83
  outputs = self.model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
84
  return self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
85
 
86
- def determine_highlights(self, video_description: str) -> str:
87
- """Determine what constitutes highlights based on video description."""
 
 
 
 
 
88
  messages = [
89
  {
90
  "role": "system",
91
- "content": [{"type": "text", "text": "You are a highlight editor. List archetypal dramatic moments that would make compelling highlights if they appear in the video. Each moment should be specific enough to be recognizable but generic enough to potentially exist in any video of this type."}]
92
  },
93
  {
94
  "role": "user",
@@ -96,6 +101,7 @@ class VideoHighlightDetector:
96
  }
97
  ]
98
 
 
99
  print(messages)
100
 
101
  inputs = self.processor.apply_chat_template(
@@ -299,15 +305,17 @@ def create_ui(examples_path: str, model_path: str):
299
  formatted_desc = f"### Summary:\n {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
300
 
301
  yield [
302
- "Determining highlight types...",
303
  formatted_desc,
304
  "",
305
  gr.update(visible=False),
306
  gr.update(visible=True)
307
  ]
308
 
309
- highlights = detector.determine_highlights(video_desc)
310
- formatted_highlights = f"### Highlights to search for:\n {highlights[:500] + '...' if len(highlights) > 500 else highlights}"
 
 
311
 
312
  # Split video into segments
313
  temp_dir = "temp_segments"
@@ -315,7 +323,8 @@ def create_ui(examples_path: str, model_path: str):
315
 
316
  segment_length = 10.0
317
  duration = get_video_duration_seconds(video)
318
- kept_segments = []
 
319
  segments_processed = 0
320
  total_segments = int(duration / segment_length)
321
 
@@ -348,10 +357,14 @@ def create_ui(examples_path: str, model_path: str):
348
  ]
349
  subprocess.run(cmd, check=True)
350
 
351
- # Process segment
352
- if detector.process_segment(segment_path, highlights):
353
- print("KEEPING SEGMENT")
354
- kept_segments.append((start_time, end_time))
 
 
 
 
355
 
356
  # Clean up segment file
357
  os.remove(segment_path)
@@ -359,14 +372,33 @@ def create_ui(examples_path: str, model_path: str):
359
  # Remove temp directory
360
  os.rmdir(temp_dir)
361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  # Create final video
363
- if kept_segments:
364
  with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
365
  temp_output = tmp_file.name
366
- detector._concatenate_scenes(video, kept_segments, temp_output)
 
 
 
 
 
367
 
368
  yield [
369
- "Processing complete!",
370
  formatted_desc,
371
  formatted_highlights,
372
  gr.update(value=temp_output, visible=True),
@@ -374,7 +406,7 @@ def create_ui(examples_path: str, model_path: str):
374
  ]
375
  else:
376
  yield [
377
- "No highlights detected in the video.",
378
  formatted_desc,
379
  formatted_highlights,
380
  gr.update(visible=False),
@@ -394,6 +426,7 @@ def create_ui(examples_path: str, model_path: str):
394
  # Clean up
395
  torch.cuda.empty_cache()
396
 
 
397
  process_btn.click(
398
  on_process,
399
  inputs=[input_video],
 
83
  outputs = self.model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
84
  return self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
85
 
86
+ def determine_highlights(self, video_description: str, prompt_num: int = 1) -> str:
87
+ """Determine what constitutes highlights based on video description with different prompts."""
88
+ system_prompts = {
89
+ 1: "You are a highlight editor. List archetypal dramatic moments that would make compelling highlights if they appear in the video. Each moment should be specific enough to be recognizable but generic enough to potentially exist in any video of this type.",
90
+ 2: "You are a highlight editor focusing on subtle and nuanced moments. List quieter, character-driven, or atmospheric moments that would make interesting highlights. Focus on moments that might be overlooked by traditional highlight detection but add depth to the story."
91
+ }
92
+
93
  messages = [
94
  {
95
  "role": "system",
96
+ "content": [{"type": "text", "text": system_prompts[prompt_num]}]
97
  },
98
  {
99
  "role": "user",
 
101
  }
102
  ]
103
 
104
+ print(f"Using prompt {prompt_num} for highlight detection")
105
  print(messages)
106
 
107
  inputs = self.processor.apply_chat_template(
 
305
  formatted_desc = f"### Summary:\n {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
306
 
307
  yield [
308
+ "Determining highlight types (2 variations)...",
309
  formatted_desc,
310
  "",
311
  gr.update(visible=False),
312
  gr.update(visible=True)
313
  ]
314
 
315
+ # Get two different sets of highlights
316
+ highlights1 = detector.determine_highlights(video_desc, prompt_num=1)
317
+ highlights2 = detector.determine_highlights(video_desc, prompt_num=2)
318
+ formatted_highlights = f"### Highlights to search for:\nSet 1:\n{highlights1[:500] + '...' if len(highlights1) > 500 else highlights1}\n\nSet 2:\n{highlights2[:500] + '...' if len(highlights2) > 500 else highlights2}"
319
 
320
  # Split video into segments
321
  temp_dir = "temp_segments"
 
323
 
324
  segment_length = 10.0
325
  duration = get_video_duration_seconds(video)
326
+ kept_segments1 = []
327
+ kept_segments2 = []
328
  segments_processed = 0
329
  total_segments = int(duration / segment_length)
330
 
 
357
  ]
358
  subprocess.run(cmd, check=True)
359
 
360
+ # Process segment with both highlight sets
361
+ if detector.process_segment(segment_path, highlights1):
362
+ print("KEEPING SEGMENT FOR SET 1")
363
+ kept_segments1.append((start_time, end_time))
364
+
365
+ if detector.process_segment(segment_path, highlights2):
366
+ print("KEEPING SEGMENT FOR SET 2")
367
+ kept_segments2.append((start_time, end_time))
368
 
369
  # Clean up segment file
370
  os.remove(segment_path)
 
372
  # Remove temp directory
373
  os.rmdir(temp_dir)
374
 
375
+ # Calculate percentages of video kept for each highlight set
376
+ total_duration = duration
377
+ duration1 = sum(end - start for start, end in kept_segments1)
378
+ duration2 = sum(end - start for start, end in kept_segments2)
379
+
380
+ percent1 = (duration1 / total_duration) * 100
381
+ percent2 = (duration2 / total_duration) * 100
382
+
383
+ print(f"Highlight set 1: {percent1:.1f}% of video")
384
+ print(f"Highlight set 2: {percent2:.1f}% of video")
385
+
386
+ # Choose the set with lower percentage unless it's zero
387
+ final_segments = kept_segments2 if (0 < percent2 <= percent1 or percent1 == 0) else kept_segments1
388
+
389
  # Create final video
390
+ if final_segments:
391
  with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
392
  temp_output = tmp_file.name
393
+ detector._concatenate_scenes(video, final_segments, temp_output)
394
+
395
+ selected_set = "2" if final_segments == kept_segments2 else "1"
396
+ percent_used = percent2 if final_segments == kept_segments2 else percent1
397
+
398
+ completion_message = f"Processing complete! Used highlight set {selected_set} ({percent_used:.1f}% of video)"
399
 
400
  yield [
401
+ completion_message,
402
  formatted_desc,
403
  formatted_highlights,
404
  gr.update(value=temp_output, visible=True),
 
406
  ]
407
  else:
408
  yield [
409
+ "No highlights detected in the video with either set of criteria.",
410
  formatted_desc,
411
  formatted_highlights,
412
  gr.update(visible=False),
 
426
  # Clean up
427
  torch.cuda.empty_cache()
428
 
429
+
430
  process_btn.click(
431
  on_process,
432
  inputs=[input_video],