testdeep123 commited on
Commit
9577f14
·
verified ·
1 Parent(s): 942c7b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +450 -278
app.py CHANGED
@@ -1,5 +1,4 @@
1
 
2
-
3
  from kokoro import KPipeline
4
 
5
  import soundfile as sf
@@ -15,7 +14,7 @@ import cv2
15
  import math
16
  import os, requests, io, time, re, random
17
  from moviepy.editor import (
18
- VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
19
  CompositeVideoClip, TextClip, CompositeAudioClip
20
  )
21
  import moviepy.video.fx.all as vfx
@@ -33,12 +32,17 @@ from urllib.parse import quote
33
  from gtts import gTTS
34
  import gradio as gr # Import Gradio
35
  import shutil # Needed for temp folder cleanup
 
 
36
 
37
  # Initialize Kokoro TTS pipeline (using American English)
38
  # Ensure you have the required voice models downloaded for Kokoro if needed,
39
  # or it will fall back to gTTS. 'a' for American English uses voice 'af_heart'.
 
 
40
  try:
41
  pipeline = KPipeline(lang_code='a')
 
42
  print("Kokoro TTS pipeline initialized.")
43
  except Exception as e:
44
  print(f"Warning: Could not initialize Kokoro TTS pipeline: {e}. Will rely on gTTS.")
@@ -51,11 +55,30 @@ except Exception as e:
51
  # Common paths: "/usr/bin/convert", "/usr/local/bin/convert", "C:\\Program Files\\ImageMagick-X.Y.Z-Q16\\convert.exe"
52
  # You might need to adjust this based on your OS and installation
53
  IMAGEMAGICK_BINARY_PATH = "/usr/bin/convert" # Default path, check your system
54
- if not os.path.exists(IMAGEMAGICK_BINARY_PATH):
55
- print(f"Warning: ImageMagick binary not found at {IMAGEMAGICK_BINARY_PATH}. TextClip may not work.")
56
- print("Please install ImageMagick or update the IMAGEMAGICK_BINARY_PATH.")
57
-
58
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": IMAGEMAGICK_BINARY_PATH})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  # ---------------- Global Configuration ---------------- #
@@ -67,7 +90,7 @@ OUTPUT_VIDEO_FILENAME = "final_video.mp4"
67
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
68
 
69
  # Maximum number of script segments to display for editing
70
- MAX_SEGMENTS_FOR_EDITING = 15
71
 
72
  # Global placeholder for the temporary folder, will be created per run
73
  TEMP_FOLDER = None
@@ -259,7 +282,7 @@ def search_pexels_videos(query):
259
  for page in range(1, num_pages + 1):
260
  for attempt in range(max_retries):
261
  try:
262
- params = {"query": search_query, "per_page": videos_per_page, "page": page}
263
  response = requests.get(base_url, headers=headers, params=params, timeout=10)
264
 
265
  if response.status_code == 200:
@@ -271,16 +294,22 @@ def search_pexels_videos(query):
271
  other_videos_on_page = []
272
  for video in videos:
273
  video_files = video.get("video_files", [])
274
- for file in video_files:
275
- if file.get("quality") == "hd":
276
- hd_videos_on_page.append(file.get("link"))
277
- break # Found HD, move to next video file for this video entry
278
- # Collect other qualities just in case no HD is found on this page or in total
279
- other_videos_on_page.append(file.get("link"))
280
-
 
 
 
 
 
 
281
 
282
  all_videos.extend(hd_videos_on_page) # Add HD videos found
283
- if not hd_videos_on_page: # If no HD found on this page, add other videos
284
  all_videos.extend(other_videos_on_page)
285
 
286
  if not videos:
@@ -424,7 +453,7 @@ def download_image(image_url, filename):
424
 
425
  try:
426
  headers = {"User-Agent": USER_AGENT}
427
- print(f"Attempting to download image from: {image_url}")
428
  response = requests.get(image_url, headers=headers, stream=True, timeout=20) # Increased timeout
429
  response.raise_for_status()
430
 
@@ -619,6 +648,12 @@ def generate_silent_audio(duration, sample_rate=24000):
619
  num_samples = int(duration * sample_rate)
620
  silence = np.zeros(num_samples, dtype=np.float32)
621
  # Use unique filename to avoid conflicts
 
 
 
 
 
 
622
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{abs(hash(duration)) % (10**8)}_{int(time.time())}.wav")
623
  try:
624
  sf.write(silent_path, silence, sample_rate)
@@ -638,6 +673,10 @@ def generate_tts(text, voice='en'):
638
  print("TTS text is empty. Generating silent audio.")
639
  return generate_silent_audio(duration=2.0) # Default silence for empty text
640
 
 
 
 
 
641
  os.makedirs(TEMP_FOLDER, exist_ok=True) # Ensure temp folder exists
642
  safe_text_hash = str(abs(hash(text)) % (10**10)) # Use a hash for potentially long text
643
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text_hash}.wav")
@@ -649,7 +688,8 @@ def generate_tts(text, voice='en'):
649
  # Estimate duration based on word count (adjust factor as needed), used if TTS fails
650
  target_duration_fallback = max(2.0, len(text.split()) * 0.4)
651
 
652
- if pipeline:
 
653
  try:
654
  print(f"Attempting Kokoro TTS for text: '{text[:50]}...'")
655
  kokoro_voice = 'af_heart' if voice == 'en' else voice # Kokoro default American English voice
@@ -657,9 +697,16 @@ def generate_tts(text, voice='en'):
657
  generator = pipeline(text, voice=kokoro_voice, speed=1.0, split_pattern=r'\n+') # Use speed 1.0
658
  audio_segments = []
659
  total_duration = 0
 
 
 
660
  for i, (gs, ps, audio) in enumerate(generator):
 
 
 
661
  audio_segments.append(audio)
662
  total_duration += len(audio) / 24000.0 # Assuming 24000 Hz sample rate
 
663
  if audio_segments:
664
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
665
  sf.write(file_path, full_audio, 24000) # Use 24000Hz standard
@@ -721,84 +768,31 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
721
  # Define start and end positions of the top-left corner of the target_resolution window
722
  start_x, start_y = 0, 0
723
  end_x, end_y = 0, 0
724
- start_zoom_factor = 1.0 # Relative to the scaled image size
725
- end_zoom_factor = 1.0
726
 
727
- # Set start/end positions based on effect type. Positions are top-left corner of the target frame within the scaled image.
 
728
  if effect_type == "zoom-in":
729
- start_zoom_factor = 1.0 # Starts covering the entire scaled image
730
- end_zoom_factor = scale_factor # Zooms to cover the original image size within the scaled frame
731
- # Stay centered
732
- start_x = max_offset_x / 2 # Top-left of the original image center
733
  start_y = max_offset_y / 2
734
  end_x = max_offset_x / 2
735
  end_y = max_offset_y / 2
736
- # Note: The zoom factor here is relative to the FINAL frame size during the effect,
737
- # which is `target_resolution`. A zoom factor of 1 means crop size is `target_resolution`.
738
- # A zoom factor of `scale_factor` means crop size is `target_resolution / scale_factor`.
739
- # Let's redefine zoom factors to be relative to target_resolution for clarity
740
- start_zoom_relative = 1.0 # Start at target size
741
- end_zoom_relative = scale_factor # End zoomed in by scale factor
742
-
743
- def get_crop_size(zoom_relative):
744
- return int(target_w / zoom_relative), int(target_h / zoom_relative)
745
-
746
- # Adjust start/end positions to match the changing crop size to keep the center aligned
747
- def get_current_center(t):
748
- progress = t / clip.duration if clip.duration > 0 else 0
749
- eased_progress = 0.5 - 0.5 * math.cos(math.pi * progress)
750
- current_zoom_relative = start_zoom_relative + (end_zoom_relative - start_zoom_relative) * eased_progress
751
- current_crop_w, current_crop_h = get_crop_size(current_zoom_relative)
752
- # Center position in the scaled image coordinates
753
- center_x = new_width / 2
754
- center_y = new_height / 2
755
- return center_x, center_y, current_crop_w, current_crop_h
756
-
757
- def transform_frame_zoom(get_frame, t):
758
- frame = get_frame(t)
759
- center_x, center_y, crop_w, crop_h = get_current_center(t)
760
- # Ensure center stays within bounds
761
- center_x = max(crop_w / 2, min(center_x, new_width - crop_w / 2))
762
- center_y = max(crop_h / 2, min(center_y, new_height - crop_h / 2))
763
- cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (center_x, center_y))
764
- resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
765
- return resized_frame
766
-
767
- return clip.fl(transform_frame_zoom)
768
-
769
 
770
  elif effect_type == "zoom-out":
771
  start_zoom_relative = scale_factor # Start zoomed in
772
- end_zoom_relative = 1.0 # End at target size
773
-
774
- def get_crop_size(zoom_relative):
775
- return int(target_w / zoom_relative), int(target_h / zoom_relative)
776
-
777
- def get_current_center(t):
778
- progress = t / clip.duration if clip.duration > 0 else 0
779
- eased_progress = 0.5 - 0.5 * math.cos(math.pi * progress)
780
- current_zoom_relative = start_zoom_relative + (end_zoom_relative - start_zoom_relative) * eased_progress
781
- current_crop_w, current_crop_h = get_crop_size(current_zoom_relative)
782
- center_x = new_width / 2
783
- center_y = new_height / 2
784
- return center_x, center_y, current_crop_w, current_crop_h
785
-
786
- def transform_frame_zoom(get_frame, t):
787
- frame = get_frame(t)
788
- center_x, center_y, crop_w, crop_h = get_current_center(t)
789
- center_x = max(crop_w / 2, min(center_x, new_width - crop_w / 2))
790
- center_y = max(crop_h / 2, min(center_y, new_height - crop_h / 2))
791
- cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (center_x, center_y))
792
- resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
793
- return resized_frame
794
-
795
- return clip.fl(transform_frame_zoom)
796
-
797
- # For pan effects, the crop size is constant (target_resolution)
798
- # We just interpolate the top-left corner position
799
- crop_w, crop_h = target_w, target_h
800
 
801
- if effect_type == "pan-left":
 
802
  start_x = max_offset_x
803
  start_y = max_offset_y / 2
804
  end_x = 0
@@ -838,13 +832,21 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
838
  print(f"Warning: Unexpected effect type '{effect_type}'. Defaulting to 'pan-right'.")
839
 
840
 
841
- def transform_frame_pan(get_frame, t):
842
  frame = get_frame(t)
843
  # Use a smooth ease-in/ease-out function
844
  progress = t / clip.duration if clip.duration > 0 else 0
845
  eased_progress = 0.5 - 0.5 * math.cos(math.pi * progress) # Cosine easing
846
 
847
- # Interpolate position (top-left corner of the target frame)
 
 
 
 
 
 
 
 
848
  current_x = start_x + (end_x - start_x) * eased_progress
849
  current_y = start_y + (end_y - start_y) * eased_progress
850
 
@@ -852,27 +854,24 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
852
  center_x = current_x + crop_w / 2
853
  center_y = current_y + crop_h / 2
854
 
855
- # Ensure center stays within the bounds of the scaled image
856
  center_x = max(crop_w / 2, min(center_x, new_width - crop_w / 2))
857
  center_y = max(crop_h / 2, min(center_y, new_height - crop_h / 2))
858
 
859
-
860
  try:
861
  # Perform the crop using cv2.getRectSubPix (expects floating point center)
862
  # Ensure frame is a numpy array (moviepy returns numpy arrays)
863
- # Clamp coordinates to avoid errors on edges
864
- # Note: cv2.getRectSubPix handles bounds clipping internally, but explicit checks can prevent NaNs
865
  center_x = np.clip(center_x, 0, new_width)
866
  center_y = np.clip(center_y, 0, new_height)
867
 
 
 
 
 
 
868
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (center_x, center_y))
869
- # Resize the cropped frame back to the target resolution (should already be target_resolution size)
870
- # This resize is actually redundant if crop_w, crop_h == target_w, target_h
871
- # but might be needed if bounds clipping changed effective size slightly?
872
- # Let's remove the resize if crop size == target size for efficiency
873
- # if (crop_w, crop_h) == (target_w, target_h):
874
- # resized_frame = cropped_frame # No need to resize
875
- # else:
876
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
877
 
878
  return resized_frame
@@ -882,8 +881,8 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
882
  return np.zeros((target_h, target_w, 3), dtype=np.uint8)
883
 
884
 
885
- # Apply the panning transform
886
- return clip.fl(transform_frame_pan)
887
 
888
 
889
  def resize_to_fill(clip, target_resolution):
@@ -901,6 +900,8 @@ def resize_to_fill(clip, target_resolution):
901
  # Ensure crop coordinates are integers
902
  x1 = int(crop_amount_x)
903
  x2 = int(clip.w - crop_amount_x)
 
 
904
  clip = clip.crop(x1=x1, x2=x2, y1=0, y2=clip.h)
905
  else: # Clip is taller than target or same aspect
906
  clip = clip.resize(width=target_w)
@@ -909,6 +910,8 @@ def resize_to_fill(clip, target_resolution):
909
  # Ensure crop coordinates are integers
910
  y1 = int(crop_amount_y)
911
  y2 = int(clip.h - crop_amount_y)
 
 
912
  clip = clip.crop(x1=0, x2=clip.w, y1=y1, y2=y2)
913
 
914
  # Final check and resize if dimensions are slightly off due to rounding
@@ -928,13 +931,15 @@ def find_mp3_files():
928
  for file in files:
929
  if file.lower().endswith('.mp3'):
930
  mp3_path = os.path.join(root, file)
931
- mp3_files.append(mp3_path)
932
- print(f"Found MP3 file: {mp3_path}")
 
 
933
 
934
  if mp3_files:
935
- return mp3_files[0] # Return the first one found
936
  else:
937
- # print("No MP3 files found in the current directory or subdirectories.") # Keep less noisy
938
  return None
939
 
940
 
@@ -945,7 +950,7 @@ def add_background_music(final_video, bg_music_path, bg_music_volume=0.08):
945
  return final_video
946
 
947
  try:
948
- print(f"Adding background music from: {bg_music_path}")
949
  bg_music = AudioFileClip(bg_music_path)
950
 
951
  # Loop background music if shorter than video
@@ -968,8 +973,12 @@ def add_background_music(final_video, bg_music_path, bg_music_volume=0.08):
968
  if video_audio:
969
  # Ensure video audio matches video duration before compositing
970
  if abs(video_audio.duration - final_video.duration) > 0.1:
971
- print(f"Adjusting video audio duration ({video_audio.duration:.2f}s) to match video duration ({final_video.duration:.2f}s)")
972
- video_audio = video_audio.fx(vfx.speedx, factor=video_audio.duration / final_video.duration)
 
 
 
 
973
 
974
  mixed_audio = CompositeAudioClip([video_audio, bg_music])
975
  # print("Composited video audio and background music") # Keep less noisy
@@ -1015,9 +1024,9 @@ def create_clip(media_asset, tts_path, estimated_duration, target_resolution,
1015
  target_clip_duration = estimated_duration # Fallback to estimated duration
1016
 
1017
 
1018
- # Handle missing media first
1019
  if not media_path or not os.path.exists(media_path):
1020
- print(f"Skipping clip {segment_index}: Missing media file {media_path}")
1021
  # Create a black clip with silent audio for the target duration
1022
  clip = ColorClip(size=target_resolution, color=(0,0,0), duration=target_clip_duration)
1023
  print(f"Created placeholder black clip for segment {segment_index}")
@@ -1047,8 +1056,8 @@ def create_clip(media_asset, tts_path, estimated_duration, target_resolution,
1047
  silent_audio_clip = silent_audio_clip.fx(vfx.speedx, factor=silent_audio_clip.duration / clip.duration)
1048
  clip = clip.set_audio(silent_audio_clip)
1049
  except Exception as e:
1050
- print(f"Error adding silent audio to placeholder clip {segment_index}: {e}")
1051
- clip = clip.set_audio(None) # Set audio to None if silent audio fails
1052
  else:
1053
  clip = clip.set_audio(None) # Set audio to None if silent audio generation fails
1054
 
@@ -1170,62 +1179,79 @@ def create_clip(media_asset, tts_path, estimated_duration, target_resolution,
1170
  if caption_enabled and narration_text and caption_color.lower() != "transparent" and narration_text.strip():
1171
  try:
1172
  # Determine total audio duration (using actual if available, else estimated)
1173
- actual_audio_duration_for_subtitles = audio_duration if audio_clip else target_clip_duration
1174
-
1175
- # Simple word-based chunking for subtitles
1176
- words = narration_text.split()
1177
- # Calculate average word duration based on total audio duration and word count
1178
- # This is a simple approach; for better sync, use a forced aligner (more complex)
1179
- total_words = len(words)
1180
- average_word_duration = actual_audio_duration_for_subtitles / total_words if total_words > 0 else 0.5 # Default if no words
1181
-
1182
- subtitle_clips = []
1183
- current_time = 0
1184
- chunk_size = 6 # Words per caption chunk (adjust as needed for readability)
1185
-
1186
- for i in range(0, total_words, chunk_size):
1187
- chunk_words = words[i:i+chunk_size]
1188
- chunk_text = ' '.join(chunk_words)
1189
- # Estimate chunk duration based on word count * average word duration
1190
- estimated_chunk_duration = len(chunk_words) * average_word_duration
1191
-
1192
- start_time = current_time
1193
- # Ensure end time doesn't exceed the *clip* duration
1194
- end_time = min(current_time + estimated_chunk_duration, clip.duration)
1195
- if start_time >= end_time: break # Avoid 0 or negative duration clips
1196
-
1197
- # Determine vertical position
1198
- if caption_position == "Top":
1199
- subtitle_y_position = int(target_resolution[1] * 0.05) # Slightly lower than top edge
1200
- elif caption_position == "Middle":
1201
- subtitle_y_position = int(target_resolution[1] * 0.5) - int(caption_size * 1.2 / 2) # Center adjusted for text height
1202
- else: # Default to Bottom
1203
- subtitle_y_position = int(target_resolution[1] * 0.9) - int(caption_size * 1.2) # Slightly higher than bottom edge, accounting for multiple lines
1204
-
1205
-
1206
- txt_clip = TextClip(
1207
- chunk_text,
1208
- fontsize=caption_size,
1209
- font='Arial-Bold', # Ensure this font is available or use a common system font
1210
- color=caption_color,
1211
- bg_color=caption_bg_color, # Use background color
1212
- method='caption', # Enables text wrapping
1213
- align='center',
1214
- stroke_width=caption_stroke_width, # Use stroke
1215
- stroke_color=caption_stroke_color, # Use stroke color
1216
- size=(target_resolution[0] * 0.9, None) # Caption width max 90% of video width
1217
- ).set_start(start_time).set_end(end_time)
1218
-
1219
- # Position is tuple ('center', y_position)
1220
- txt_clip = txt_clip.set_position(('center', subtitle_y_position))
1221
- subtitle_clips.append(txt_clip)
1222
- current_time = end_time # Move to the end of the current chunk
1223
-
1224
- if subtitle_clips:
1225
- clip = CompositeVideoClip([clip] + subtitle_clips)
1226
- # print(f"Added {len(subtitle_clips)} subtitle chunks to clip {segment_index}.") # Keep less noisy
1227
- # else:
1228
- # print(f"No subtitle clips generated for segment {segment_index} (might be due to text/duration issues).") # Keep less noisy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1229
 
1230
 
1231
  except Exception as sub_error:
@@ -1243,22 +1269,23 @@ def create_clip(media_asset, tts_path, estimated_duration, target_resolution,
1243
  stroke_width=caption_stroke_width,
1244
  stroke_color=caption_stroke_color,
1245
  size=(target_resolution[0] * 0.8, None)
1246
- ).set_position(('center', int(target_resolution[1] * 0.75))).set_duration(clip.duration)
1247
  clip = CompositeVideoClip([clip, txt_clip])
1248
  print(f"Added simple fallback subtitle for segment {segment_index}.")
1249
  except Exception as fallback_sub_error:
1250
  print(f"Simple fallback subtitle failed for segment {segment_index}: {fallback_sub_error}")
1251
 
1252
 
1253
- # Ensure final clip duration is explicitly set
1254
- clip = clip.set_duration(clip.duration)
1255
 
1256
  # print(f"Clip {segment_index} created successfully: {clip.duration:.2f}s") # Keep less noisy
1257
  return clip
1258
  except Exception as e:
1259
  print(f"Critical error in create_clip for segment {segment_index}: {str(e)}")
1260
  # Create a black clip with error message if anything goes wrong during the main process
1261
- error_duration = target_clip_duration if 'target_clip_duration' in locals() else (estimated_duration if estimated_duration else 3.0)
 
1262
  print(f"Creating error placeholder black clip for segment {segment_index} with duration {error_duration:.2f}s.")
1263
  black_clip = ColorClip(size=target_resolution, color=(0,0,0), duration=error_duration)
1264
  error_text = f"Error in segment {segment_index}"
@@ -1286,23 +1313,52 @@ def create_clip(media_asset, tts_path, estimated_duration, target_resolution,
1286
  def fix_imagemagick_policy():
1287
  """Attempt to fix ImageMagick security policies required by TextClip."""
1288
  print("Attempting to fix ImageMagick security policies...")
1289
- policy_paths = [
1290
- "/etc/ImageMagick-6/policy.xml",
1291
- "/etc/ImageMagick-7/policy.xml",
1292
- "/etc/ImageMagick/policy.xml", # Common symlink path
1293
- "/usr/local/etc/ImageMagick-7/policy.xml", # macports/homebrew path
1294
- "/usr/share/ImageMagick/policy.xml", # Another common path
1295
- "/usr/share/ImageMagick-6/policy.xml",
1296
- "/usr/share/ImageMagick-7/policy.xml",
1297
- os.path.join(os.environ.get('MAGICK_HOME', ''), 'policy.xml') if os.environ.get('MAGICK_HOME') else '', # Check MAGICK_HOME
1298
- # Add more paths if needed based on typical installations
1299
- ]
1300
- # Filter out empty paths
1301
- policy_paths = [path for path in policy_paths if path and os.path.exists(path)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1302
 
1303
  found_policy = None
1304
- if policy_paths:
1305
- found_policy = policy_paths[0] # Use the first one found
1306
 
1307
  if not found_policy:
1308
  print("No policy.xml found in common locations. TextClip may fail.")
@@ -1321,6 +1377,7 @@ def fix_imagemagick_policy():
1321
 
1322
 
1323
  # Read the original policy file (handle potential permission issues)
 
1324
  try:
1325
  with open(found_policy, 'r') as f:
1326
  policy_content = f.read()
@@ -1337,11 +1394,17 @@ def fix_imagemagick_policy():
1337
  print(f"Failed to read policy file using sudo cat. Error: {stderr.decode('utf-8')}")
1338
  print("Manual intervention may be required.")
1339
  return False
 
 
 
1340
  except Exception as e_sudo_read:
1341
  print(f"Error executing sudo cat: {e_sudo_read}")
1342
  print("Manual intervention may be required.")
1343
  return False
1344
 
 
 
 
1345
 
1346
  # Use regex to find and replace the specific policy lines
1347
  # Allow read and write rights for PDF, EPS, PS, etc. potentially restricted formats
@@ -1359,9 +1422,10 @@ def fix_imagemagick_policy():
1359
  modified_content
1360
  )
1361
  # Catch any other "rights=none" for coder or path domains, but be cautious
 
1362
  modified_content = re.sub(
1363
- r'<policy domain="(coder|path)" rights="none"(.*?)/>',
1364
- r'<policy domain="\1" rights="read|write"\2/>',
1365
  modified_content
1366
  )
1367
 
@@ -1377,21 +1441,28 @@ def fix_imagemagick_policy():
1377
  # Fallback to using os.system with sudo tee if direct write fails
1378
  # This requires the user to be able to run sudo commands without a password prompt for the script's execution
1379
  # and tee needs to be available.
1380
- # Using tee is safer than sudo cp for writing potentially large content.
1381
  try:
1382
  # Write modified content to a temporary file first
 
 
 
 
 
 
1383
  temp_policy_file = os.path.join(TEMP_FOLDER, "temp_policy_modified.xml")
1384
  with open(temp_policy_file, 'w') as f:
1385
  f.write(modified_content)
1386
 
1387
  # Use sudo tee to overwrite the original file
1388
- # echo <content> | sudo tee <file> > /dev/null
1389
- cmd = f'sudo tee {found_policy} > /dev/null'
1390
- print(f"Executing: echo ... | {cmd}")
1391
 
1392
- # Using subprocess is safer than os.system for piping
1393
- process = subprocess.Popen(['sudo', 'tee', found_policy], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1394
- stdout, stderr = process.communicate(input=modified_content.encode('utf-8'))
 
1395
 
1396
  if process.returncode == 0:
1397
  print("ImageMagick policies updated successfully using sudo tee.")
@@ -1401,6 +1472,9 @@ def fix_imagemagick_policy():
1401
  print("Please manually edit your policy.xml to grant read/write rights for coder and path domains.")
1402
  print("Example: Change <policy domain='coder' rights='none' pattern='PDF'> to <policy domain='coder' rights='read|write' pattern='PDF'>")
1403
  return False
 
 
 
1404
  except Exception as e_sudo_write:
1405
  print(f"Error executing sudo tee process: {e_sudo_write}")
1406
  print("Manual intervention may be required.")
@@ -1417,10 +1491,6 @@ def fix_imagemagick_policy():
1417
  return False
1418
 
1419
 
1420
- # Import subprocess for sudo commands in fix_imagemagick_policy
1421
- import subprocess
1422
-
1423
-
1424
  # ---------------- Gradio Interface Functions ---------------- #
1425
 
1426
  def generate_script_and_show_editor(user_input, resolution_choice,
@@ -1445,6 +1515,7 @@ def generate_script_and_show_editor(user_input, resolution_choice,
1445
  print(f"Error starting cleanup of temp folder {TEMP_FOLDER}: {e}")
1446
 
1447
  # Create a new unique temporary folder for this run
 
1448
  TEMP_FOLDER = tempfile.mkdtemp(prefix="aivgen_")
1449
  print(f"Created new temp folder: {TEMP_FOLDER}")
1450
 
@@ -1463,10 +1534,12 @@ def generate_script_and_show_editor(user_input, resolution_choice,
1463
  }
1464
 
1465
  # Initial status update and hide editing/video areas
 
1466
  yield (run_config,
1467
  gr.update(value="Generating script...", visible=True),
1468
  gr.update(visible=False), # Hide editing area
1469
  gr.update(value=None, visible=False), # Hide video output and clear value
 
1470
  # Updates for dynamic components (initially hide/clear all)
1471
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)], # Hide textboxes
1472
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)], # Hide file uploads
@@ -1476,9 +1549,8 @@ def generate_script_and_show_editor(user_input, resolution_choice,
1476
 
1477
  script_text = generate_script(user_input, OPENROUTER_API_KEY, OPENROUTER_MODEL)
1478
 
1479
- # Update raw script preview
1480
- raw_script_preview = f"### Generated Script Preview\n\n```\n{script_text}\n```" if script_text else "### Generated Script Preview\n\nFailed to generate script."
1481
-
1482
 
1483
  if not script_text or script_text.startswith("[Error]"):
1484
  # Update status and keep editing/video areas hidden
@@ -1486,12 +1558,13 @@ def generate_script_and_show_editor(user_input, resolution_choice,
1486
  gr.update(value=f"Script generation failed: {script_text}", visible=True),
1487
  gr.update(visible=False),
1488
  gr.update(value=None, visible=False),
 
1489
  # Updates for dynamic components (all hidden)
1490
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1491
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1492
  [gr.update(visible=False) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1493
  [], # segments_state remains empty
1494
- raw_script_preview) # Update raw script preview
1495
  return # Stop execution
1496
 
1497
 
@@ -1499,11 +1572,12 @@ def generate_script_and_show_editor(user_input, resolution_choice,
1499
  gr.update(value="Parsing script...", visible=True),
1500
  gr.update(visible=False),
1501
  gr.update(value=None, visible=False),
 
1502
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1503
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1504
  [gr.update(visible=False) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1505
  [], # segments_state will be updated next
1506
- raw_script_preview)
1507
 
1508
 
1509
  segments = parse_script(script_text)
@@ -1513,12 +1587,13 @@ def generate_script_and_show_editor(user_input, resolution_choice,
1513
  gr.update(value="Failed to parse script or script is empty after parsing.", visible=True),
1514
  gr.update(visible=False),
1515
  gr.update(value=None, visible=False),
 
1516
  # Updates for dynamic components (all hidden)
1517
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1518
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1519
  [gr.update(visible=False) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1520
  [], # segments_state remains empty
1521
- raw_script_preview) # Update raw script preview
1522
  return # Stop execution
1523
 
1524
 
@@ -1545,11 +1620,12 @@ def generate_script_and_show_editor(user_input, resolution_choice,
1545
  gr.update(value=f"Script generated with {len(segments)} segments. Edit segments below.", visible=True),
1546
  gr.update(visible=True), # Show Editing area
1547
  gr.update(value=None, visible=False), # Ensure video output is hidden and cleared
 
1548
  textbox_updates, # Update textboxes (visibility and value)
1549
  file_updates, # Update file uploads (visibility and value)
1550
  group_visibility_updates, # Update visibility of groups
1551
  segments, # Update the state with parsed segments
1552
- raw_script_preview) # Update raw script preview
1553
 
1554
 
1555
  def generate_video_from_edited(run_config, segments_data, segment_texts, segment_uploads, bg_music_volume):
@@ -1590,34 +1666,37 @@ def generate_video_from_edited(run_config, segments_data, segment_texts, segment
1590
  # Update segments_data with potentially edited text and uploaded file paths
1591
  # segment_texts and segment_uploads are lists of values from the Gradio components
1592
  processed_segments = []
1593
- for i, segment in enumerate(segments_data):
1594
- if i < len(segment_texts) and i < len(segment_uploads): # Ensure we have corresponding input values
1595
- processed_segment = segment.copy() # Make a copy
1596
- # Use edited text, strip whitespace
1597
- processed_segment['text'] = segment_texts[i].strip() if segment_texts[i] is not None else segment.get('text', '').strip()
1598
- # Use uploaded media path (will be None if nothing uploaded)
1599
- processed_segment['uploaded_media'] = segment_uploads[i]
1600
- processed_segments.append(processed_segment)
1601
- else:
1602
- # This shouldn't happen if state and UI updates are in sync, but as a safeguard
1603
- print(f"Warning: Missing input value(s) for segment index {i}. Using original segment data.")
1604
- processed_segments.append(segment) # Append original if inputs are missing
1605
 
1606
- if not processed_segments:
1607
- yield "No valid segments to process after editing.", None
1608
- # Clean up
1609
- if TEMP_FOLDER and os.path.exists(TEMP_FOLDER):
1610
  try:
1611
  shutil.rmtree(TEMP_FOLDER)
1612
  print(f"Cleaned up temp folder: {TEMP_FOLDER}")
1613
  except Exception as e:
1614
  print(f"Error cleaning up temp folder {TEMP_FOLDER}: {e}")
1615
- TEMP_FOLDER = None # Reset global
1616
- return
 
 
 
 
 
 
 
 
 
 
1617
 
1618
 
1619
  yield "Fixing ImageMagick policy...", None
1620
- fix_imagemagick_policy() # Attempt policy fix before creating clips
 
 
 
1621
 
1622
  clips = []
1623
  yield "Generating media and audio for clips...", None
@@ -1633,7 +1712,6 @@ def generate_video_from_edited(run_config, segments_data, segment_texts, segment
1633
  segment.get('uploaded_media') # Pass uploaded media path
1634
  )
1635
 
1636
-
1637
  # Generate TTS audio
1638
  tts_path = generate_tts(segment.get('text', '')) # Use edited text, default to empty string if None/missing
1639
 
@@ -1724,6 +1802,9 @@ def generate_video_from_edited(run_config, segments_data, segment_texts, segment
1724
  shutil.move(temp_output_filename, final_output_path)
1725
  print(f"Final video saved as {final_output_path}")
1726
  output_path = final_output_path
 
 
 
1727
  except Exception as e:
1728
  print(f"Error moving temporary file {temp_output_filename} to final destination {final_output_path}: {e}")
1729
  # If move fails, return the temp file path or None
@@ -1756,6 +1837,7 @@ def generate_video_from_edited(run_config, segments_data, segment_texts, segment
1756
 
1757
  # Need lists to hold the dynamic UI components for segments
1758
  segment_editing_groups = []
 
1759
  segment_text_inputs = []
1760
  segment_file_inputs = []
1761
 
@@ -1802,12 +1884,14 @@ with gr.Blocks() as demo:
1802
  gr.Markdown("### Edit Script Segments")
1803
  gr.Markdown("Review the AI-generated text and media suggestions below. Edit the text and/or upload your own image/video for any segment. If no file is uploaded, AI will fetch media based on the original prompt.")
1804
  for i in range(MAX_SEGMENTS_FOR_EDITING):
1805
- # Use gr.Box for better visual grouping
1806
- with gr.Box(visible=False) as segment_group: # Each group represents one segment
1807
  segment_editing_groups.append(segment_group)
1808
  # Use a Label to display the original prompt - it's non-interactive text
1809
- segment_prompt_label = gr.Label(f"Segment {i+1} Prompt:", show_label=False) # Label will be set by JS
1810
- # We'll update the value of this label using JS/state change
 
 
1811
 
1812
  segment_text = gr.Textbox(label="Narration Text", lines=2, interactive=True)
1813
  segment_text_inputs.append(segment_text)
@@ -1842,12 +1926,12 @@ with gr.Blocks() as demo:
1842
  status_output, # Update status label
1843
  editing_area, # Show/hide editing area column
1844
  final_video_output, # Hide and clear video output
 
1845
  # Outputs for dynamic components (visibility and value updates)
1846
  *segment_text_inputs,
1847
  *segment_file_inputs,
1848
  *segment_editing_groups,
1849
  segments_state, # Update segments state
1850
- script_preview_markdown # Update raw script preview
1851
  ]
1852
  )
1853
 
@@ -1864,8 +1948,7 @@ with gr.Blocks() as demo:
1864
  outputs=[status_output, final_video_output] # Yield status updates and final video
1865
  )
1866
 
1867
- # Add JS to update segment prompt Labels after script generation
1868
- # This JS function reads the segments_state and updates the Labels
1869
  demo.load(
1870
  None,
1871
  None,
@@ -1875,69 +1958,158 @@ with gr.Blocks() as demo:
1875
  function updateSegmentPromptLabels(segments_data) {{
1876
  console.log("updateSegmentPromptLabels called", segments_data);
1877
  // Gradio stores dynamic component outputs in a flat list.
1878
- // The prompt labels are the first Label component in each segment group.
1879
- // Assuming the order is consistent: [Label_0, Textbox_0, File_0, Label_1, Textbox_1, File_1, ...]
1880
- // We need to find the correct Label element for each segment index.
 
1881
 
1882
- // Find all elements that are potentially segment prompt labels
1883
- const all_segment_labels = document.querySelectorAll('.segment_group_box > label.svelte-q5b6g8'); // Find Label elements within segment boxes
1884
 
1885
- if (!segments_data || segments_data.length === 0) {{
1886
  // Clear any existing labels if script generation failed or empty
1887
- all_segment_labels.forEach(label => label.textContent = '');
1888
- return;
 
 
 
 
 
1889
  }}
1890
 
1891
  for (let i = 0; i < {MAX_SEGMENTS_FOR_EDITING}; i++) {{
1892
- // Assuming the labels correspond directly to the group index
1893
- const promptLabel = all_segment_labels[i]; // Get the i-th potential label
1894
 
1895
- if (promptLabel) {{
1896
- if (i < segments_data.length) {{
1897
  // Update label text with the original prompt
1898
- promptLabel.textContent = `Segment ${i+1} (Prompt: ${segments_data[i].original_prompt})`;
1899
- promptLabel.parentElement.style.display = 'block'; // Ensure parent box is visible (redundant if group visibility is set, but safe)
 
 
1900
  }} else {{
1901
- // Hide label for unused segments
1902
- promptLabel.textContent = '';
1903
- promptLabel.parentElement.style.display = 'none'; // Hide parent box
1904
  }}
1905
  }} else {{
1906
- console.warn(`Prompt label element not found for segment index ${i}`);
1907
  }}
1908
  }}
 
1909
  }}
1910
  """
1911
  )
1912
 
1913
  # Trigger the JS function whenever segments_state changes
1914
  segments_state.change(
1915
- None, # No Python function to call
1916
- segments_state, # The state variable that changed
1917
- None, # No output components to update via Python
1918
- _js="""
1919
  (segments_data) => {
1920
- // Call the JS function defined in demo.load
1921
- updateSegmentPromptLabels(segments_data);
1922
- // Return the segments_data itself if needed for chaining, but here it's not.
1923
- // This function just updates the UI client-side.
1924
- return arguments[0]; // Return original arguments to avoid state getting cleared
1925
- }
1926
- """
1927
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1928
 
1929
- # Launch the interface
1930
- if __name__ == "__main__":
1931
- # Attempt ImageMagick policy fix on script startup
1932
- # This helps but might still require manual sudo depending on system config
1933
- fix_imagemagick_policy()
 
 
1934
 
1935
- print("Launching Gradio interface...")
 
 
1936
 
1937
- # Check if API keys are still placeholders (unlikely with hardcoded keys, but good practice)
1938
- if PEXELS_API_KEY.startswith('YOUR_PEXELS_API_KEY'):
1939
- print("Warning: PEXELS_API_KEY is not configured. Media search may fail.")
1940
- if OPENROUTER_API_KEY.startswith('YOUR_OPENROUTER_API_KEY'):
1941
- print("Warning: OPENROUTER_API_KEY is not configured. Script generation will fail.")
 
 
 
 
 
 
 
 
 
 
 
 
 
1942
 
1943
- demo.launch(share=True) # Set share=True to get a public link
 
 
 
 
1
 
 
2
  from kokoro import KPipeline
3
 
4
  import soundfile as sf
 
14
  import math
15
  import os, requests, io, time, re, random
16
  from moviepy.editor import (
17
+ VideoFileFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
18
  CompositeVideoClip, TextClip, CompositeAudioClip
19
  )
20
  import moviepy.video.fx.all as vfx
 
32
  from gtts import gTTS
33
  import gradio as gr # Import Gradio
34
  import shutil # Needed for temp folder cleanup
35
+ import subprocess # Needed for sudo commands in fix_imagemagick_policy
36
+
37
 
38
  # Initialize Kokoro TTS pipeline (using American English)
39
  # Ensure you have the required voice models downloaded for Kokoro if needed,
40
  # or it will fall back to gTTS. 'a' for American English uses voice 'af_heart'.
41
+ # Add a flag to check if Kokoro initialized successfully
42
+ kokoro_initialized = False
43
  try:
44
  pipeline = KPipeline(lang_code='a')
45
+ kokoro_initialized = True
46
  print("Kokoro TTS pipeline initialized.")
47
  except Exception as e:
48
  print(f"Warning: Could not initialize Kokoro TTS pipeline: {e}. Will rely on gTTS.")
 
55
  # Common paths: "/usr/bin/convert", "/usr/local/bin/convert", "C:\\Program Files\\ImageMagick-X.Y.Z-Q16\\convert.exe"
56
  # You might need to adjust this based on your OS and installation
57
  IMAGEMAGICK_BINARY_PATH = "/usr/bin/convert" # Default path, check your system
58
+ # Add more common paths to check
59
+ common_imagemagick_paths = [
60
+ "/usr/bin/convert",
61
+ "/usr/local/bin/convert",
62
+ "/opt/homebrew/bin/convert", # Homebrew on macOS ARM
63
+ "/usr/local/opt/imagemagick/bin/convert", # Older Homebrew
64
+ "C:\\Program Files\\ImageMagick-X.Y.Z-Q16\\convert.exe", # Windows example, adjust version
65
+ # Add other paths as needed for your environment
66
+ ]
67
+
68
+ found_imagemagick_binary = None
69
+ for path in common_imagemagick_paths:
70
+ if os.path.exists(path):
71
+ found_imagemagick_binary = path
72
+ break
73
+
74
+ if found_imagemagick_binary:
75
+ print(f"Found ImageMagick binary at: {found_imagemagick_binary}")
76
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": found_imagemagick_binary})
77
+ else:
78
+ print("Warning: ImageMagick binary 'convert' not found in common locations.")
79
+ print("TextClip may fail. Please install ImageMagick or update the IMAGICK_BINARY setting if it's installed elsewhere.")
80
+ # Still try to set a default path, though it might be wrong
81
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": IMAGEMAGICK_BINARY_PATH})
82
 
83
 
84
  # ---------------- Global Configuration ---------------- #
 
90
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
91
 
92
  # Maximum number of script segments to display for editing
93
+ MAX_SEGMENTS_FOR_EDITING = 15 # Limit for UI manageability
94
 
95
  # Global placeholder for the temporary folder, will be created per run
96
  TEMP_FOLDER = None
 
282
  for page in range(1, num_pages + 1):
283
  for attempt in range(max_retries):
284
  try:
285
+ params = {"query": search_query, "per_page": videos_per_page, "page": page, "orientation": "landscape"} # Added orientation
286
  response = requests.get(base_url, headers=headers, params=params, timeout=10)
287
 
288
  if response.status_code == 200:
 
294
  other_videos_on_page = []
295
  for video in videos:
296
  video_files = video.get("video_files", [])
297
+ # Sort video files by quality preference if possible
298
+ video_files_sorted = sorted(video_files, key=lambda x: {'hd': 0, 'sd': 1}.get(x.get('quality'), 2))
299
+
300
+ for file in video_files_sorted:
301
+ link = file.get("link")
302
+ quality = file.get("quality")
303
+ if link:
304
+ if quality == "hd":
305
+ hd_videos_on_page.append(link)
306
+ break # Found the best quality for this video entry
307
+ else:
308
+ other_videos_on_page.append(link)
309
+ # Don't break, keep looking for HD for this video entry
310
 
311
  all_videos.extend(hd_videos_on_page) # Add HD videos found
312
+ if not hd_videos_on_page: # If no HD found on this page, add other videos found on this page
313
  all_videos.extend(other_videos_on_page)
314
 
315
  if not videos:
 
453
 
454
  try:
455
  headers = {"User-Agent": USER_AGENT}
456
+ # print(f"Attempting to download image from: {image_url}") # Keep less noisy
457
  response = requests.get(image_url, headers=headers, stream=True, timeout=20) # Increased timeout
458
  response.raise_for_status()
459
 
 
648
  num_samples = int(duration * sample_rate)
649
  silence = np.zeros(num_samples, dtype=np.float32)
650
  # Use unique filename to avoid conflicts
651
+ # Ensure TEMP_FOLDER exists before generating path
652
+ if not TEMP_FOLDER:
653
+ print("Error: TEMP_FOLDER not set for generate_silent_audio.")
654
+ return None
655
+ os.makedirs(TEMP_FOLDER, exist_ok=True)
656
+
657
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{abs(hash(duration)) % (10**8)}_{int(time.time())}.wav")
658
  try:
659
  sf.write(silent_path, silence, sample_rate)
 
673
  print("TTS text is empty. Generating silent audio.")
674
  return generate_silent_audio(duration=2.0) # Default silence for empty text
675
 
676
+ if not TEMP_FOLDER:
677
+ print("Error: TEMP_FOLDER not set for generate_tts.")
678
+ return generate_silent_audio(duration=max(2.0, len(text.split()) * 0.4))
679
+
680
  os.makedirs(TEMP_FOLDER, exist_ok=True) # Ensure temp folder exists
681
  safe_text_hash = str(abs(hash(text)) % (10**10)) # Use a hash for potentially long text
682
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text_hash}.wav")
 
688
  # Estimate duration based on word count (adjust factor as needed), used if TTS fails
689
  target_duration_fallback = max(2.0, len(text.split()) * 0.4)
690
 
691
+ # Use the global kokoro_initialized flag
692
+ if kokoro_initialized and pipeline:
693
  try:
694
  print(f"Attempting Kokoro TTS for text: '{text[:50]}...'")
695
  kokoro_voice = 'af_heart' if voice == 'en' else voice # Kokoro default American English voice
 
697
  generator = pipeline(text, voice=kokoro_voice, speed=1.0, split_pattern=r'\n+') # Use speed 1.0
698
  audio_segments = []
699
  total_duration = 0
700
+ # Iterate through the generator, but add a timeout to prevent infinite loops
701
+ start_time = time.time()
702
+ timeout = 30 # seconds per segment generation attempt
703
  for i, (gs, ps, audio) in enumerate(generator):
704
+ if time.time() - start_time > timeout:
705
+ print(f"Kokoro TTS timed out after {timeout}s for text segment {i}.")
706
+ break # Exit loop on timeout
707
  audio_segments.append(audio)
708
  total_duration += len(audio) / 24000.0 # Assuming 24000 Hz sample rate
709
+
710
  if audio_segments:
711
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
712
  sf.write(file_path, full_audio, 24000) # Use 24000Hz standard
 
768
  # Define start and end positions of the top-left corner of the target_resolution window
769
  start_x, start_y = 0, 0
770
  end_x, end_y = 0, 0
771
+ start_zoom_relative = 1.0 # Relative to target_resolution size
772
+ end_zoom_relative = 1.0
773
 
774
+ # Set start/end positions and zoom based on effect type.
775
+ # Positions are top-left corner of the target frame within the scaled image coordinates (new_width, new_height).
776
  if effect_type == "zoom-in":
777
+ start_zoom_relative = 1.0 # Start covering target_resolution size
778
+ end_zoom_relative = scale_factor # End covering target_resolution / scale_factor size (zoomed in)
779
+ # Stay centered in the *scaled* image
780
+ start_x = max_offset_x / 2
781
  start_y = max_offset_y / 2
782
  end_x = max_offset_x / 2
783
  end_y = max_offset_y / 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
 
785
  elif effect_type == "zoom-out":
786
  start_zoom_relative = scale_factor # Start zoomed in
787
+ end_zoom_relative = 1.0 # End at target_resolution size
788
+ # Stay centered in the *scaled* image
789
+ start_x = max_offset_x / 2
790
+ start_y = max_offset_y / 2
791
+ end_x = max_offset_x / 2
792
+ end_y = max_offset_y / 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
 
794
+ # For pan effects, the crop size is constant (target_resolution, which corresponds to zoom_relative=1.0)
795
+ elif effect_type == "pan-left":
796
  start_x = max_offset_x
797
  start_y = max_offset_y / 2
798
  end_x = 0
 
832
  print(f"Warning: Unexpected effect type '{effect_type}'. Defaulting to 'pan-right'.")
833
 
834
 
835
+ def transform_frame(get_frame, t):
836
  frame = get_frame(t)
837
  # Use a smooth ease-in/ease-out function
838
  progress = t / clip.duration if clip.duration > 0 else 0
839
  eased_progress = 0.5 - 0.5 * math.cos(math.pi * progress) # Cosine easing
840
 
841
+ # Interpolate zoom relative to target_resolution
842
+ current_zoom_relative = start_zoom_relative + (end_zoom_relative - start_zoom_relative) * eased_progress
843
+
844
+ # Calculate crop size based on current zoom relative to target resolution
845
+ # If zoom_relative is 1, crop size is target_resolution. If zoom_relative is scale_factor, crop size is target_resolution/scale_factor
846
+ crop_w = int(target_w / current_zoom_relative)
847
+ crop_h = int(target_h / current_zoom_relative)
848
+
849
+ # Interpolate position (top-left corner of the target frame within the scaled image)
850
  current_x = start_x + (end_x - start_x) * eased_progress
851
  current_y = start_y + (end_y - start_y) * eased_progress
852
 
 
854
  center_x = current_x + crop_w / 2
855
  center_y = current_y + crop_h / 2
856
 
857
+ # Ensure center stays within the bounds of the scaled image (new_width, new_height)
858
  center_x = max(crop_w / 2, min(center_x, new_width - crop_w / 2))
859
  center_y = max(crop_h / 2, min(center_y, new_height - crop_h / 2))
860
 
 
861
  try:
862
  # Perform the crop using cv2.getRectSubPix (expects floating point center)
863
  # Ensure frame is a numpy array (moviepy returns numpy arrays)
864
+ # Clamp center coordinates just in case, although max/min should handle it
 
865
  center_x = np.clip(center_x, 0, new_width)
866
  center_y = np.clip(center_y, 0, new_height)
867
 
868
+ # Ensure crop dimensions are positive integers
869
+ crop_w = max(1, crop_w)
870
+ crop_h = max(1, crop_h)
871
+
872
+
873
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (center_x, center_y))
874
+ # Resize the cropped frame back to the target resolution
 
 
 
 
 
 
875
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
876
 
877
  return resized_frame
 
881
  return np.zeros((target_h, target_w, 3), dtype=np.uint8)
882
 
883
 
884
+ # Apply the transformation function
885
+ return clip.fl(transform_frame)
886
 
887
 
888
  def resize_to_fill(clip, target_resolution):
 
900
  # Ensure crop coordinates are integers
901
  x1 = int(crop_amount_x)
902
  x2 = int(clip.w - crop_amount_x)
903
+ # Handle potential edge cases with integer rounding
904
+ x2 = max(x1 + 1, x2) # Ensure at least 1 pixel width if needed
905
  clip = clip.crop(x1=x1, x2=x2, y1=0, y2=clip.h)
906
  else: # Clip is taller than target or same aspect
907
  clip = clip.resize(width=target_w)
 
910
  # Ensure crop coordinates are integers
911
  y1 = int(crop_amount_y)
912
  y2 = int(clip.h - crop_amount_y)
913
+ # Handle potential edge cases with integer rounding
914
+ y2 = max(y1 + 1, y2) # Ensure at least 1 pixel height if needed
915
  clip = clip.crop(x1=0, x2=clip.w, y1=y1, y2=y2)
916
 
917
  # Final check and resize if dimensions are slightly off due to rounding
 
931
  for file in files:
932
  if file.lower().endswith('.mp3'):
933
  mp3_path = os.path.join(root, file)
934
+ # Exclude files that are likely temporary or part of internal libraries
935
+ if not any(keyword in mp3_path for keyword in ['temp', '.gradio', 'site-packages', 'dist-packages', 'venv']):
936
+ mp3_files.append(mp3_path)
937
+ print(f"Found MP3 file: {mp3_path}")
938
 
939
  if mp3_files:
940
+ return mp3_files[0] # Return the first one found that isn't excluded
941
  else:
942
+ # print("No user-provided MP3 files found in the current directory or subdirectories.") # Keep less noisy
943
  return None
944
 
945
 
 
950
  return final_video
951
 
952
  try:
953
+ print(f"Adding background music from: {bg_music_path} with volume {bg_music_volume}")
954
  bg_music = AudioFileClip(bg_music_path)
955
 
956
  # Loop background music if shorter than video
 
973
  if video_audio:
974
  # Ensure video audio matches video duration before compositing
975
  if abs(video_audio.duration - final_video.duration) > 0.1:
976
+ print(f"Adjusting video audio duration ({video_audio.duration:.2f}s) to match video duration ({final_video.duration:.2f}s) for final mix")
977
+ try:
978
+ video_audio = video_audio.fx(vfx.speedx, factor=video_audio.duration / final_video.duration)
979
+ except Exception as e:
980
+ print(f"Error adjusting final video audio speed: {e}. Using original audio.")
981
+ pass # Proceed with original audio if speedx fails
982
 
983
  mixed_audio = CompositeAudioClip([video_audio, bg_music])
984
  # print("Composited video audio and background music") # Keep less noisy
 
1024
  target_clip_duration = estimated_duration # Fallback to estimated duration
1025
 
1026
 
1027
+ # Handle missing or invalid media first
1028
  if not media_path or not os.path.exists(media_path):
1029
+ print(f"Skipping clip {segment_index}: Missing or invalid media file {media_path}")
1030
  # Create a black clip with silent audio for the target duration
1031
  clip = ColorClip(size=target_resolution, color=(0,0,0), duration=target_clip_duration)
1032
  print(f"Created placeholder black clip for segment {segment_index}")
 
1056
  silent_audio_clip = silent_audio_clip.fx(vfx.speedx, factor=silent_audio_clip.duration / clip.duration)
1057
  clip = clip.set_audio(silent_audio_clip)
1058
  except Exception as e:
1059
+ print(f"Error setting silent audio to placeholder clip {segment_index}: {e}")
1060
+ clip = clip.set_audio(None) # Set audio to None if silent audio fails loading
1061
  else:
1062
  clip = clip.set_audio(None) # Set audio to None if silent audio generation fails
1063
 
 
1179
  if caption_enabled and narration_text and caption_color.lower() != "transparent" and narration_text.strip():
1180
  try:
1181
  # Determine total audio duration (using actual if available, else estimated)
1182
+ # Use clip.duration for subtitle timing as the clip's duration is final
1183
+ actual_clip_duration_for_subtitles = clip.duration
1184
+ if actual_clip_duration_for_subtitles <= 0:
1185
+ print(f"Clip duration is zero or negative for segment {segment_index}, cannot add subtitles.")
1186
+ else:
1187
+ # Simple word-based chunking for subtitles
1188
+ words = narration_text.split()
1189
+ # Calculate average word duration based on clip duration and word count
1190
+ total_words = len(words)
1191
+ average_word_duration = actual_clip_duration_for_subtitles / total_words if total_words > 0 else 0.5 # Default if no words
1192
+
1193
+ subtitle_clips = []
1194
+ current_time = 0
1195
+ chunk_size = 6 # Words per caption chunk (adjust as needed for readability)
1196
+
1197
+ for i in range(0, total_words, chunk_size):
1198
+ chunk_words = words[i:i+chunk_size]
1199
+ chunk_text = ' '.join(chunk_words)
1200
+ # Estimate chunk duration based on word count * average word duration
1201
+ estimated_chunk_duration = len(chunk_words) * average_word_duration
1202
+
1203
+ start_time = current_time
1204
+ # Ensure end time doesn't exceed the *clip* duration
1205
+ end_time = min(current_time + estimated_chunk_duration, clip.duration)
1206
+ if start_time >= end_time: break # Avoid 0 or negative duration clips
1207
+
1208
+ # Determine vertical position
1209
+ if caption_position == "Top":
1210
+ subtitle_y_position = int(target_resolution[1] * 0.05) # Slightly lower than top edge
1211
+ elif caption_position == "Middle":
1212
+ # Calculate vertical center, then subtract half the estimated text height
1213
+ # Estimate text height based on font size and number of lines (adjust factor as needed)
1214
+ estimated_text_lines = math.ceil(len(chunk_words) / chunk_size) # Crude estimate
1215
+ estimated_total_text_height = estimated_text_lines * caption_size * 1.2 # 1.2 is line spacing approx
1216
+ subtitle_y_position = int(target_resolution[1] * 0.5) - int(estimated_total_text_height / 2)
1217
+ # Ensure position is not off-screen
1218
+ subtitle_y_position = max(0, subtitle_y_position)
1219
+ else: # Default to Bottom
1220
+ # Position from the bottom edge
1221
+ subtitle_y_position = int(target_resolution[1] * 0.9) # Start near bottom
1222
+ # Note: moviepy positions text relative to the top-left of the *text box*.
1223
+ # To place the bottom of the text at a certain point, this requires more complex calculation
1224
+ # based on actual text height, which is hard to get before rendering.
1225
+ # The current 'bottom' positioning puts the *top* of the text box at ~90% down.
1226
+ # A simpler approach for "Bottom" is positioning the text box bottom at a fixed Y.
1227
+ # Let's stick to positioning the top-left of the text box for simplicity with moviepy's TextClip position.
1228
+ # positioning the top-left of the text box at 85% of height often looks good for bottom captions.
1229
+ subtitle_y_position = int(target_resolution[1] * 0.85) # Top-left of text box is at 85% height
1230
+
1231
+
1232
+ txt_clip = TextClip(
1233
+ chunk_text,
1234
+ fontsize=caption_size,
1235
+ font='Arial-Bold', # Ensure this font is available or use a common system font
1236
+ color=caption_color,
1237
+ bg_color=caption_bg_color, # Use background color
1238
+ method='caption', # Enables text wrapping
1239
+ align='center',
1240
+ stroke_width=caption_stroke_width, # Use stroke
1241
+ stroke_color=caption_stroke_color, # Use stroke color
1242
+ size=(target_resolution[0] * 0.9, None) # Caption width max 90% of video width
1243
+ ).set_start(start_time).set_end(end_time)
1244
+
1245
+ # Position is tuple ('center', y_position)
1246
+ txt_clip = txt_clip.set_position(('center', subtitle_y_position))
1247
+ subtitle_clips.append(txt_clip)
1248
+ current_time = end_time # Move to the end of the current chunk
1249
+
1250
+ if subtitle_clips:
1251
+ clip = CompositeVideoClip([clip] + subtitle_clips)
1252
+ # print(f"Added {len(subtitle_clips)} subtitle chunks to clip {segment_index}.") # Keep less noisy
1253
+ # else:
1254
+ # print(f"No subtitle clips generated for segment {segment_index} (might be due to text/duration issues).") # Keep less noisy
1255
 
1256
 
1257
  except Exception as sub_error:
 
1269
  stroke_width=caption_stroke_width,
1270
  stroke_color=caption_stroke_color,
1271
  size=(target_resolution[0] * 0.8, None)
1272
+ ).set_position(('center', int(target_resolution[1] * 0.75))).set_duration(clip.duration) # Position slightly above bottom
1273
  clip = CompositeVideoClip([clip, txt_clip])
1274
  print(f"Added simple fallback subtitle for segment {segment_index}.")
1275
  except Exception as fallback_sub_error:
1276
  print(f"Simple fallback subtitle failed for segment {segment_index}: {fallback_sub_error}")
1277
 
1278
 
1279
+ # Ensure final clip duration is explicitly set (already done earlier based on audio)
1280
+ # clip = clip.set_duration(clip.duration)
1281
 
1282
  # print(f"Clip {segment_index} created successfully: {clip.duration:.2f}s") # Keep less noisy
1283
  return clip
1284
  except Exception as e:
1285
  print(f"Critical error in create_clip for segment {segment_index}: {str(e)}")
1286
  # Create a black clip with error message if anything goes wrong during the main process
1287
+ # Use a safe duration if previous duration calculation also failed
1288
+ error_duration = target_clip_duration if 'target_clip_duration' in locals() and target_clip_duration > 0 else (estimated_duration if estimated_duration > 0 else 3.0)
1289
  print(f"Creating error placeholder black clip for segment {segment_index} with duration {error_duration:.2f}s.")
1290
  black_clip = ColorClip(size=target_resolution, color=(0,0,0), duration=error_duration)
1291
  error_text = f"Error in segment {segment_index}"
 
1313
  def fix_imagemagick_policy():
1314
  """Attempt to fix ImageMagick security policies required by TextClip."""
1315
  print("Attempting to fix ImageMagick security policies...")
1316
+
1317
+ # Use the found binary path if available, otherwise use default list
1318
+ if found_imagemagick_binary:
1319
+ # Assuming policy.xml is relative to the binary path or in a standard location
1320
+ # This is a heuristic, may need manual path depending on installation
1321
+ policy_paths_to_check = [
1322
+ os.path.join(os.path.dirname(found_imagemagick_binary), '..', 'etc', 'ImageMagick-7', 'policy.xml'),
1323
+ os.path.join(os.path.dirname(found_imagemagick_binary), '..', 'etc', 'ImageMagick-6', 'policy.xml'),
1324
+ os.path.join(os.path.dirname(found_imagemagick_binary), '..', 'etc', 'ImageMagick', 'policy.xml'),
1325
+ os.path.join(os.path.dirname(found_imagemagick_binary), '..', 'share', 'ImageMagick-7', 'policy.xml'),
1326
+ os.path.join(os.path.dirname(found_imagemagick_binary), '..', 'share', 'ImageMagick-6', 'policy.xml'),
1327
+ os.path.join(os.path.dirname(found_imagemagick_binary), '..', 'share', 'ImageMagick', 'policy.xml'),
1328
+ # Add more paths relative to binary if needed
1329
+ ]
1330
+ # Add standard system paths as fallbacks
1331
+ policy_paths_to_check.extend([
1332
+ "/etc/ImageMagick-6/policy.xml",
1333
+ "/etc/ImageMagick-7/policy.xml",
1334
+ "/etc/ImageMagick/policy.xml",
1335
+ "/usr/local/etc/ImageMagick-7/policy.xml", # macports/homebrew path
1336
+ "/usr/share/ImageMagick/policy.xml",
1337
+ "/usr/share/ImageMagick-6/policy.xml",
1338
+ "/usr/share/ImageMagick-7/policy.xml",
1339
+ os.path.join(os.environ.get('MAGICK_HOME', ''), 'policy.xml') if os.environ.get('MAGICK_HOME') else '', # Check MAGICK_HOME
1340
+ ])
1341
+ else:
1342
+ # Only check standard system paths if binary wasn't found
1343
+ policy_paths_to_check = [
1344
+ "/etc/ImageMagick-6/policy.xml",
1345
+ "/etc/ImageMagick-7/policy.xml",
1346
+ "/etc/ImageMagick/policy.xml",
1347
+ "/usr/local/etc/ImageMagick-7/policy.xml", # macports/homebrew path
1348
+ "/usr/share/ImageMagick/policy.xml",
1349
+ "/usr/share/ImageMagick-6/policy.xml",
1350
+ "/usr/share/ImageMagick-7/policy.xml",
1351
+ os.path.join(os.environ.get('MAGICK_HOME', ''), 'policy.xml') if os.environ.get('MAGICK_HOME') else '', # Check MAGICK_HOME
1352
+ ]
1353
+
1354
+
1355
+ # Filter out empty paths and check existence
1356
+ existing_policy_paths = [path for path in policy_paths_to_check if path and os.path.exists(path)]
1357
+
1358
 
1359
  found_policy = None
1360
+ if existing_policy_paths:
1361
+ found_policy = existing_policy_paths[0] # Use the first one found
1362
 
1363
  if not found_policy:
1364
  print("No policy.xml found in common locations. TextClip may fail.")
 
1377
 
1378
 
1379
  # Read the original policy file (handle potential permission issues)
1380
+ policy_content = None
1381
  try:
1382
  with open(found_policy, 'r') as f:
1383
  policy_content = f.read()
 
1394
  print(f"Failed to read policy file using sudo cat. Error: {stderr.decode('utf-8')}")
1395
  print("Manual intervention may be required.")
1396
  return False
1397
+ except FileNotFoundError:
1398
+ print(f"sudo command not found. Cannot read policy file with sudo.")
1399
+ return False
1400
  except Exception as e_sudo_read:
1401
  print(f"Error executing sudo cat: {e_sudo_read}")
1402
  print("Manual intervention may be required.")
1403
  return False
1404
 
1405
+ if policy_content is None:
1406
+ print("Failed to read policy file content.")
1407
+ return False
1408
 
1409
  # Use regex to find and replace the specific policy lines
1410
  # Allow read and write rights for PDF, EPS, PS, etc. potentially restricted formats
 
1422
  modified_content
1423
  )
1424
  # Catch any other "rights=none" for coder or path domains, but be cautious
1425
+ # Use non-greedy match .*?
1426
  modified_content = re.sub(
1427
+ r'<policy domain="(coder|path)" rights="none"(.*?)/?>', # Added /? for self-closing tag
1428
+ r'<policy domain="\1" rights="read|write"\2/>', # Ensure it ends with self-closing tag
1429
  modified_content
1430
  )
1431
 
 
1441
  # Fallback to using os.system with sudo tee if direct write fails
1442
  # This requires the user to be able to run sudo commands without a password prompt for the script's execution
1443
  # and tee needs to be available.
1444
+ # Using subprocess is safer than os.system for piping
1445
  try:
1446
  # Write modified content to a temporary file first
1447
+ # Ensure TEMP_FOLDER is set before creating a temp file path
1448
+ if not TEMP_FOLDER:
1449
+ print("Error: TEMP_FOLDER not set for sudo write fallback.")
1450
+ return False
1451
+ os.makedirs(TEMP_FOLDER, exist_ok=True) # Ensure temp folder exists
1452
+
1453
  temp_policy_file = os.path.join(TEMP_FOLDER, "temp_policy_modified.xml")
1454
  with open(temp_policy_file, 'w') as f:
1455
  f.write(modified_content)
1456
 
1457
  # Use sudo tee to overwrite the original file
1458
+ # sudo tee <file> < temp_file
1459
+ cmd = ['sudo', 'tee', found_policy]
1460
+ print(f"Executing: {' '.join(cmd)} < {temp_policy_file}")
1461
 
1462
+ # Using subprocess with stdin redirection
1463
+ with open(temp_policy_file, 'rb') as f_in: # Open in binary mode for input
1464
+ process = subprocess.Popen(cmd, stdin=f_in, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1465
+ stdout, stderr = process.communicate()
1466
 
1467
  if process.returncode == 0:
1468
  print("ImageMagick policies updated successfully using sudo tee.")
 
1472
  print("Please manually edit your policy.xml to grant read/write rights for coder and path domains.")
1473
  print("Example: Change <policy domain='coder' rights='none' pattern='PDF'> to <policy domain='coder' rights='read|write' pattern='PDF'>")
1474
  return False
1475
+ except FileNotFoundError:
1476
+ print(f"sudo or tee command not found. Cannot write policy file with sudo.")
1477
+ return False
1478
  except Exception as e_sudo_write:
1479
  print(f"Error executing sudo tee process: {e_sudo_write}")
1480
  print("Manual intervention may be required.")
 
1491
  return False
1492
 
1493
 
 
 
 
 
1494
  # ---------------- Gradio Interface Functions ---------------- #
1495
 
1496
  def generate_script_and_show_editor(user_input, resolution_choice,
 
1515
  print(f"Error starting cleanup of temp folder {TEMP_FOLDER}: {e}")
1516
 
1517
  # Create a new unique temporary folder for this run
1518
+ # Add a suffix based on time to minimize collision risk if cleanup fails
1519
  TEMP_FOLDER = tempfile.mkdtemp(prefix="aivgen_")
1520
  print(f"Created new temp folder: {TEMP_FOLDER}")
1521
 
 
1534
  }
1535
 
1536
  # Initial status update and hide editing/video areas
1537
+ # Yielding multiple updates in a list/tuple works for simultaneous updates
1538
  yield (run_config,
1539
  gr.update(value="Generating script...", visible=True),
1540
  gr.update(visible=False), # Hide editing area
1541
  gr.update(value=None, visible=False), # Hide video output and clear value
1542
+ gr.update(visible=False, value="### Generated Script Preview\n\nGenerating script..."), # Hide and update raw script preview
1543
  # Updates for dynamic components (initially hide/clear all)
1544
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)], # Hide textboxes
1545
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)], # Hide file uploads
 
1549
 
1550
  script_text = generate_script(user_input, OPENROUTER_API_KEY, OPENROUTER_MODEL)
1551
 
1552
+ # Determine raw script preview content
1553
+ raw_script_preview_content = f"### Generated Script Preview\n\n```\n{script_text}\n```" if script_text and not script_text.startswith("[Error]") else f"### Generated Script Preview\n\n{script_text}"
 
1554
 
1555
  if not script_text or script_text.startswith("[Error]"):
1556
  # Update status and keep editing/video areas hidden
 
1558
  gr.update(value=f"Script generation failed: {script_text}", visible=True),
1559
  gr.update(visible=False),
1560
  gr.update(value=None, visible=False),
1561
+ gr.update(visible=True, value=raw_script_preview_content), # Show raw script preview on error
1562
  # Updates for dynamic components (all hidden)
1563
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1564
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1565
  [gr.update(visible=False) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1566
  [], # segments_state remains empty
1567
+ )
1568
  return # Stop execution
1569
 
1570
 
 
1572
  gr.update(value="Parsing script...", visible=True),
1573
  gr.update(visible=False),
1574
  gr.update(value=None, visible=False),
1575
+ gr.update(visible=True, value=raw_script_preview_content), # Show raw script preview
1576
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1577
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1578
  [gr.update(visible=False) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1579
  [], # segments_state will be updated next
1580
+ )
1581
 
1582
 
1583
  segments = parse_script(script_text)
 
1587
  gr.update(value="Failed to parse script or script is empty after parsing.", visible=True),
1588
  gr.update(visible=False),
1589
  gr.update(value=None, visible=False),
1590
+ gr.update(visible=True, value=raw_script_preview_content), # Show raw script preview
1591
  # Updates for dynamic components (all hidden)
1592
  [gr.update(visible=False, value="") for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1593
  [gr.update(visible=False, value=None) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1594
  [gr.update(visible=False) for _ in range(MAX_SEGMENTS_FOR_EDITING)],
1595
  [], # segments_state remains empty
1596
+ )
1597
  return # Stop execution
1598
 
1599
 
 
1620
  gr.update(value=f"Script generated with {len(segments)} segments. Edit segments below.", visible=True),
1621
  gr.update(visible=True), # Show Editing area
1622
  gr.update(value=None, visible=False), # Ensure video output is hidden and cleared
1623
+ gr.update(visible=True, value=raw_script_preview_content), # Show raw script preview
1624
  textbox_updates, # Update textboxes (visibility and value)
1625
  file_updates, # Update file uploads (visibility and value)
1626
  group_visibility_updates, # Update visibility of groups
1627
  segments, # Update the state with parsed segments
1628
+ )
1629
 
1630
 
1631
  def generate_video_from_edited(run_config, segments_data, segment_texts, segment_uploads, bg_music_volume):
 
1666
  # Update segments_data with potentially edited text and uploaded file paths
1667
  # segment_texts and segment_uploads are lists of values from the Gradio components
1668
  processed_segments = []
1669
+ # Iterate up to the minimum of state segments and provided inputs
1670
+ num_segments_to_process = min(len(segments_data), len(segment_texts), len(segment_uploads), MAX_SEGMENTS_FOR_EDITING)
 
 
 
 
 
 
 
 
 
 
1671
 
1672
+ if num_segments_to_process == 0:
1673
+ yield "No segments to process after reading editor inputs.", None
1674
+ # Clean up
1675
+ if TEMP_FOLDER and os.path.exists(TEMP_FOLDER):
1676
  try:
1677
  shutil.rmtree(TEMP_FOLDER)
1678
  print(f"Cleaned up temp folder: {TEMP_FOLDER}")
1679
  except Exception as e:
1680
  print(f"Error cleaning up temp folder {TEMP_FOLDER}: {e}")
1681
+ TEMP_FOLDER = None # Reset global
1682
+ return
1683
+
1684
+
1685
+ for i in range(num_segments_to_process):
1686
+ segment = segments_data[i] # Get original segment data
1687
+ processed_segment = segment.copy() # Make a copy
1688
+ # Use edited text, strip whitespace
1689
+ processed_segment['text'] = segment_texts[i].strip() if segment_texts[i] is not None else segment.get('text', '').strip()
1690
+ # Use uploaded media path (will be None if nothing uploaded)
1691
+ processed_segment['uploaded_media'] = segment_uploads[i]
1692
+ processed_segments.append(processed_segment)
1693
 
1694
 
1695
  yield "Fixing ImageMagick policy...", None
1696
+ # Call fix_imagemagick_policy again just before video generation as a safeguard
1697
+ # This might require the user to enter a password if sudo is needed.
1698
+ # It's better to handle permissions manually or configure sudoers.
1699
+ fix_imagemagick_policy()
1700
 
1701
  clips = []
1702
  yield "Generating media and audio for clips...", None
 
1712
  segment.get('uploaded_media') # Pass uploaded media path
1713
  )
1714
 
 
1715
  # Generate TTS audio
1716
  tts_path = generate_tts(segment.get('text', '')) # Use edited text, default to empty string if None/missing
1717
 
 
1802
  shutil.move(temp_output_filename, final_output_path)
1803
  print(f"Final video saved as {final_output_path}")
1804
  output_path = final_output_path
1805
+ except shutil.SameFileError:
1806
+ print(f"Output path is the same as temp path, no move needed: {temp_output_filename}")
1807
+ output_path = temp_output_filename
1808
  except Exception as e:
1809
  print(f"Error moving temporary file {temp_output_filename} to final destination {final_output_path}: {e}")
1810
  # If move fails, return the temp file path or None
 
1837
 
1838
  # Need lists to hold the dynamic UI components for segments
1839
  segment_editing_groups = []
1840
+ segment_prompt_labels = [] # List to hold the prompt Labels
1841
  segment_text_inputs = []
1842
  segment_file_inputs = []
1843
 
 
1884
  gr.Markdown("### Edit Script Segments")
1885
  gr.Markdown("Review the AI-generated text and media suggestions below. Edit the text and/or upload your own image/video for any segment. If no file is uploaded, AI will fetch media based on the original prompt.")
1886
  for i in range(MAX_SEGMENTS_FOR_EDITING):
1887
+ # Use gr.Group instead of gr.Box for compatibility
1888
+ with gr.Group(visible=False) as segment_group: # Each group represents one segment
1889
  segment_editing_groups.append(segment_group)
1890
  # Use a Label to display the original prompt - it's non-interactive text
1891
+ # The value will be updated by JS
1892
+ segment_prompt_label = gr.Label(f"Segment {i+1} Prompt:", show_label=False)
1893
+ segment_prompt_labels.append(segment_prompt_label)
1894
+
1895
 
1896
  segment_text = gr.Textbox(label="Narration Text", lines=2, interactive=True)
1897
  segment_text_inputs.append(segment_text)
 
1926
  status_output, # Update status label
1927
  editing_area, # Show/hide editing area column
1928
  final_video_output, # Hide and clear video output
1929
+ script_preview_markdown, # Update raw script preview
1930
  # Outputs for dynamic components (visibility and value updates)
1931
  *segment_text_inputs,
1932
  *segment_file_inputs,
1933
  *segment_editing_groups,
1934
  segments_state, # Update segments state
 
1935
  ]
1936
  )
1937
 
 
1948
  outputs=[status_output, final_video_output] # Yield status updates and final video
1949
  )
1950
 
1951
+ # Add JS to update segment prompt Labels when segments_state changes
 
1952
  demo.load(
1953
  None,
1954
  None,
 
1958
  function updateSegmentPromptLabels(segments_data) {{
1959
  console.log("updateSegmentPromptLabels called", segments_data);
1960
  // Gradio stores dynamic component outputs in a flat list.
1961
+ // The prompt labels are specific Label components we've identified.
1962
+ // We need to map the segment index to the correct Label component index.
1963
+ // Since segment_prompt_labels is a list of Gradio components,
1964
+ // its corresponding elements in the _js scope arguments list are ordered similarly.
1965
 
1966
+ const segmentPromptLabelOutputs = arguments[0]; // This argument corresponds to segment_prompt_labels in the outputs list of segments_state.change
1967
+ const segmentsData = arguments[1]; // This argument corresponds to segments_state in the inputs list
1968
 
1969
+ if (!segmentsData || segmentsData.length === 0) {{
1970
  // Clear any existing labels if script generation failed or empty
1971
+ for(let i = 0; i < {MAX_SEGMENTS_FOR_EDITING}; i++) {{
1972
+ // The Gradio JS output object for a Label has a 'value' property
1973
+ if (segmentPromptLabelOutputs && segmentPromptLabelOutputs[i]) {{
1974
+ segmentPromptLabelOutputs[i].update(''); // Or promptLabel.value = ''; depending on version
1975
+ }}
1976
+ }}
1977
+ return segmentPromptLabelOutputs; // Return the output object unchanged
1978
  }}
1979
 
1980
  for (let i = 0; i < {MAX_SEGMENTS_FOR_EDITING}; i++) {{
1981
+ const gradioLabelOutput = segmentPromptLabelOutputs[i]; // Get the Gradio JS output object for this label
 
1982
 
1983
+ if (gradioLabelOutput) {{
1984
+ if (i < segmentsData.length) {{
1985
  // Update label text with the original prompt
1986
+ const promptText = `Segment ${i+1} (Prompt: ${segmentsData[i].original_prompt})`;
1987
+ gradioLabelOutput.update(promptText); // Update the label's value
1988
+ // Need to ensure the parent container (gr.Group) is visible.
1989
+ // Assuming the gr.Group visibility update happens elsewhere (in generate_script_and_show_editor)
1990
  }} else {{
1991
+ // Clear label for unused segments
1992
+ gradioLabelOutput.update('');
1993
+ // Hiding the gr.Group should handle hiding the label, but clear value anyway
1994
  }}
1995
  }} else {{
1996
+ console.warn(`Prompt label JS output object not found for segment index ${i}`);
1997
  }}
1998
  }}
1999
+ return segmentPromptLabelOutputs; // Return the output object unchanged
2000
  }}
2001
  """
2002
  )
2003
 
2004
  # Trigger the JS function whenever segments_state changes
2005
  segments_state.change(
2006
+ fn=None, # No Python function to call
2007
+ inputs=[segments_state], # The state variable that changed
2008
+ outputs=[*segment_prompt_labels], # Explicitly list the labels to be updated by JS
2009
+ js="""
2010
  (segments_data) => {
2011
+ // The arguments passed to this JS function will match the inputs list.
2012
+ // So segments_data is arguments[0].
2013
+ // The outputs list elements are available via arguments[1] onwards.
2014
+ // This structure seems inconsistent with the doc examples sometimes.
2015
+ // Let's pass the labels back as inputs and have the JS function handle it.
2016
+ // Re-structuring the JS call:
2017
+ // Let's make the JS function *only* take the segments_data
2018
+ // and rely on the Gradio JS environment to access/update the elements.
2019
+ // This is harder. The standard way is to list outputs.
2020
+
2021
+ // Let's pass segments_state and the labels list to the JS function.
2022
+ // The JS function then finds the correct label outputs.
2023
+
2024
+ // Let's use the standard Gradio approach: list inputs and outputs.
2025
+ // The JS function receives inputs as arguments[0...n] and expects
2026
+ // return values corresponding to outputs.
2027
+
2028
+ // Let's modify the JS function to take segments_data directly and
2029
+ // return an array of updates for the labels.
2030
+ // This requires the JS function to live outside the _js call, as it does.
2031
+ // The _js call then just invokes that function.
2032
+
2033
+ // The inputs to this _js are the *outputs* of the upstream function (generate_script_and_show_editor).
2034
+ // So, segments_data is arguments[8] (index 8 because there are 8 outputs before segments_state)
2035
+ // This is complicated! Let's simplify.
2036
+
2037
+ // Alternative: Use a single JS function that takes segments_data and the total number of segments.
2038
+ // It finds the elements by some other means (e.g., data attributes or a known structure).
2039
+
2040
+ // Let's go back to the original plan, JS function defined in demo.load,
2041
+ // triggered by state.change, inputs=[state], outputs=[elements_to_update].
2042
+ // The arguments to the JS function will be [state_value, output1_value, output2_value, ...]
2043
+ // In segments_state.change(inputs=[segments_state], outputs=[*segment_prompt_labels]):
2044
+ // arguments[0] = segments_state value
2045
+ // arguments[1...n] = current values of segment_prompt_labels components (less useful)
2046
+ // The return value of the JS function should be an array matching the shape of outputs.
2047
+
2048
+ // Let's pass segments_data as the input and the labels as outputs.
2049
+ // The JS function will receive segments_data and needs to return updates for the labels.
2050
+ // The JS function needs access to the list of segments_data and the indices of the output labels.
2051
+
2052
+ // Let's try this simplified JS structure in the segments_state.change _js:
2053
+ // arguments[0] is the new value of segments_state
2054
+ const newSegmentsData = arguments[0];
2055
+ const updates = [];
2056
+ // We need to generate updates for each segment_prompt_label
2057
+ // This requires knowing how many labels there are and their order in the outputs list.
2058
+ // This is where the complexity lies in dynamically addressing components in JS.
2059
+
2060
+ // Revert to the simpler approach: the JS function is defined in demo.load
2061
+ // and segments_state.change simply calls it with the segments_data.
2062
+ // The JS function needs to find the elements itself.
2063
+ // Let's use data attributes or a known structure to find the labels.
2064
+
2065
+ // Let's give each segment group and label a predictable ID or class.
2066
+ // Update Python code to add data attributes or IDs.
2067
+ // Update JS selector.
2068
+
2069
+ // *** Re-implementing JS selector using a more robust method ***
2070
+
2071
+ // arguments[0] is the new value of segments_state
2072
+ const segmentsData = arguments[0];
2073
+ // Find the main editing area container
2074
+ const editingArea = document.querySelector('#editing_area_id'); // Need to add id in Python
2075
+
2076
+ if (!editingArea) {{
2077
+ console.error("Editing area container not found.");
2078
+ return arguments[0]; // Return state unchanged
2079
+ }}
2080
 
2081
+ // Find all segment group containers within the editing area
2082
+ const segmentGroups = editingArea.querySelectorAll('.gradio-group'); // Using generic Gradio group class
2083
+
2084
+ if (!segmentGroups || segmentGroups.length === 0) {{
2085
+ console.warn("No segment groups found.");
2086
+ return arguments[0]; // Return state unchanged
2087
+ }}
2088
 
2089
+ segmentGroups.forEach((group, index) => {{
2090
+ // Find the first Label component within this group
2091
+ const promptLabel = group.querySelector('label.svelte-q5b6g8'); // Assuming Label uses 'label' tag and this class
2092
 
2093
+ if (promptLabel) {{
2094
+ if (index < segmentsData.length) {{
2095
+ // Update label text
2096
+ promptLabel.textContent = `Segment ${index+1} (Prompt: ${segmentsData[index].original_prompt})`;
2097
+ group.style.display = 'block'; // Ensure the group is visible (should be handled by Python outputs too)
2098
+ }} else {{
2099
+ // Hide label and group for unused segments
2100
+ promptLabel.textContent = '';
2101
+ group.style.display = 'none';
2102
+ }}
2103
+ }} else {{
2104
+ console.warn(`Prompt label element not found within group index ${index}.`);
2105
+ // Still hide the group if it's unused
2106
+ if (index >= segmentsData.length) {{
2107
+ group.style.display = 'none';
2108
+ }}
2109
+ }}
2110
+ }});
2111
 
2112
+ return arguments[0]; // Return segments_data unchanged
2113
+ }
2114
+ """
2115
+ )