testdeep123 commited on
Commit
864c351
·
verified ·
1 Parent(s): 40088c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +268 -600
app.py CHANGED
@@ -14,7 +14,7 @@ import io
14
  import shutil
15
  from urllib.parse import quote
16
  import numpy as np
17
- from bs4 import BeautifulSoup
18
  import base64
19
  from gtts import gTTS
20
  import gradio as gr
@@ -27,11 +27,10 @@ from moviepy.editor import (
27
  concatenate_videoclips, CompositeAudioClip
28
  )
29
  import moviepy.video.fx.all as vfx
30
- # No longer importing moviepy.config or calling change_settings for ImageMagick
31
 
32
- # Pydub imports (for potential gTTS fallback format conversion)
33
  from pydub import AudioSegment
34
- from pydub.generators import Sine # Might not be needed if TTS works
35
 
36
  # ---------------- Global Configuration ---------------- #
37
  # --- API Keys (Replace with your actual keys) ---
@@ -43,25 +42,24 @@ OUTPUT_VIDEO_FILENAME = "final_video.mp4"
43
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
44
 
45
  # --- Gradio Controlled Variables (with defaults) ---
46
- selected_voice = 'af_heart' # Default voice (Kokoro American Female)
47
- voice_speed = 0.9 # Default voice speed
48
- font_size = 45 # Default caption font size
49
- video_clip_probability = 0.25 # Default probability for using video clips (25%)
50
- bg_music_volume = 0.08 # Default background music volume
51
- fps = 30 # Default video frames per second
52
- preset = "veryfast" # Default video export preset (faster, lower quality)
53
- caption_style_bg_color = 'rgba(0, 0, 0, 0.6)' # Background for captions
54
- caption_style_text_color = 'yellow' # Text color for captions
55
- caption_font = 'Arial-Bold' # Font for captions (ensure it's available)
56
 
57
  # --- Runtime Variables (set per execution) ---
58
- TARGET_RESOLUTION = None # Will be set based on Gradio input (e.g., (1920, 1080))
59
- TEMP_FOLDER = None # Will be created temporarily for each run
60
- USE_CAPTIONS = True # Will be set based on Gradio input
61
 
62
  # ---------------- Kokoro TTS Initialization ---------------- #
63
  try:
64
- # Initialize Kokoro TTS pipeline (using American English default)
65
  pipeline = KPipeline(lang_code='a')
66
  print("Kokoro TTS Pipeline initialized.")
67
  except Exception as e:
@@ -73,14 +71,14 @@ except Exception as e:
73
 
74
  def generate_script(user_input):
75
  """Generate documentary script using OpenRouter API."""
 
 
76
  headers = {
77
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
78
  'Content-Type': 'application/json',
79
- # Optional but recommended:
80
  'HTTP-Referer': 'http://localhost:7860', # Or your app's URL
81
  'X-Title': 'AI Documentary Maker Gradio'
82
  }
83
-
84
  prompt = f"""Create a short, humorous, slightly negative, and conversational documentary-style script based on the following topic or instructions: '{user_input}'.
85
 
86
  Formatting Rules:
@@ -107,36 +105,30 @@ Subscribe now, or a cat will knock your coffee over.
107
 
108
  Now generate the script based on: {user_input}
109
  """
110
-
111
  data = {
112
  'model': OPENROUTER_MODEL,
113
  'messages': [{'role': 'user', 'content': prompt}],
114
- 'temperature': 0.6, # Slightly more creative for humor
115
- 'max_tokens': 600 # Adjust as needed
116
  }
117
-
118
  try:
119
  response = requests.post(
120
  'https://openrouter.ai/api/v1/chat/completions',
121
  headers=headers,
122
  json=data,
123
- timeout=45 # Increased timeout
124
  )
125
-
126
- response.raise_for_status() # Raises HTTPError for bad responses (4xx or 5xx)
127
-
128
  response_data = response.json()
129
  if 'choices' in response_data and len(response_data['choices']) > 0:
130
  script_content = response_data['choices'][0]['message']['content']
131
- # Basic cleaning: remove potential preamble/postamble if AI didn't follow instructions perfectly
132
- script_content = re.sub(r"^.*?\n?\[", "[", script_content, flags=re.DOTALL) # Remove anything before the first bracket
133
  script_content = script_content.strip()
134
  print("Script generated successfully.")
135
  return script_content
136
  else:
137
  print(f"API Error: Unexpected response format: {response_data}")
138
  return None
139
-
140
  except requests.exceptions.Timeout:
141
  print("API Error: Request timed out.")
142
  return None
@@ -151,235 +143,179 @@ Now generate the script based on: {user_input}
151
  return None
152
 
153
  def parse_script(script_text):
154
- """
155
- Parse the generated script into a list of dictionaries,
156
- each representing a segment with a visual prompt and narration.
157
- """
158
  segments = []
159
  current_title = None
160
  current_narration = ""
161
-
162
  if not script_text:
163
  print("Error: Script text is empty.")
164
  return []
165
-
166
  lines = script_text.strip().splitlines()
167
-
168
  for line in lines:
169
  line = line.strip()
170
- if not line:
171
- continue
172
-
173
  title_match = re.match(r'^\[(.*?)\](.*)', line)
174
-
175
  if title_match:
176
- # If we were processing a previous title, save it
177
  if current_title is not None and current_narration.strip():
178
- segments.append({
179
- "prompt": current_title,
180
- "narration": current_narration.strip()
181
- })
182
-
183
- # Start the new segment
184
  current_title = title_match.group(1).strip()
185
- current_narration = title_match.group(2).strip() + " " # Add space for potential multi-line narration
186
  elif current_title is not None:
187
- # Append to the current narration if it's not a new title line
188
  current_narration += line + " "
189
-
190
- # Add the last segment
191
  if current_title is not None and current_narration.strip():
192
- segments.append({
193
- "prompt": current_title,
194
- "narration": current_narration.strip()
195
- })
196
-
197
  if not segments:
198
  print("Error: Could not parse any segments from the script.")
199
- # Attempt a simpler parse if the strict format failed
200
  simple_segments = []
201
  for i, line in enumerate(lines):
202
- if line.strip():
203
- simple_segments.append({"prompt": f"Scene {i+1}", "narration": line.strip()})
204
  if simple_segments:
205
  print("Warning: Using simplified script parsing.")
206
  return simple_segments
207
- else:
208
- return []
209
-
210
-
211
  print(f"Parsed {len(segments)} segments from script.")
212
  return segments
213
 
 
214
  def search_pexels(query, api_key, search_type="videos"):
215
- """Search Pexels for videos or images."""
216
- if not api_key:
217
- print(f"Pexels API key not provided. Skipping Pexels {search_type} search.")
218
  return None
219
 
220
  base_url = f"https://api.pexels.com/{search_type}/search"
221
  headers = {'Authorization': api_key}
222
  params = {"query": query, "per_page": 15, "orientation": "landscape"}
223
  if search_type == "videos":
224
- params["size"] = "medium" # Request medium or large, HD might not always be available
225
 
226
  max_retries = 3
227
- retry_delay = 1
 
 
 
228
 
229
  for attempt in range(max_retries):
230
  try:
231
- response = requests.get(base_url, headers=headers, params=params, timeout=15)
232
- response.raise_for_status() # Check for HTTP errors
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  data = response.json()
235
  items = data.get(search_type, [])
236
 
237
  if not items:
238
  print(f"No Pexels {search_type} found for query: {query}")
239
- return None
240
 
 
241
  valid_items = []
242
  if search_type == "videos":
243
  for video in items:
244
- # Prefer HD or Large, fallback to Medium
245
  hd_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'hd' and f.get('width', 0) >= 1080), None)
246
  large_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'large' and f.get('width', 0) >= 1080), None)
247
  medium_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'medium'), None)
248
  link = hd_link or large_link or medium_link
249
- if link:
250
- valid_items.append(link)
251
  else: # images
252
  for photo in items:
253
  link = photo.get("src", {}).get("large2x") or photo.get("src", {}).get("original")
254
- if link:
255
- valid_items.append(link)
256
 
257
  if valid_items:
258
- print(f"Found {len(valid_items)} Pexels {search_type} for '{query}'. Selecting one.")
259
  return random.choice(valid_items)
260
  else:
261
  print(f"No suitable quality Pexels {search_type} found for query: {query}")
262
  return None
263
 
264
  except requests.exceptions.Timeout:
265
- print(f"Pexels API timeout (attempt {attempt+1}/{max_retries})...")
266
  time.sleep(retry_delay)
267
  retry_delay *= 2
268
  except requests.exceptions.RequestException as e:
269
- print(f"Pexels API error (attempt {attempt+1}/{max_retries}): {e}")
270
- if response is not None and response.status_code == 429: # Rate limit
271
- print("Rate limit hit, waiting longer...")
272
- time.sleep(retry_delay * 2)
273
- retry_delay *= 2
274
- else:
275
- time.sleep(retry_delay)
276
- retry_delay *= 2
277
  except Exception as e:
278
  print(f"Unexpected error during Pexels search: {e}")
279
- break # Don't retry on unexpected errors
280
 
281
  print(f"Pexels {search_type} search failed for '{query}' after {max_retries} attempts.")
282
  return None
283
 
284
- def search_google_images(query):
285
- """Search for images on Google Images (use sparingly and ethically)."""
286
- print(f"Attempting Google Image search for: {query} (Use with caution)")
287
- try:
288
- # Using a simple search URL, might be fragile
289
- search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch&safe=active"
290
- headers = {"User-Agent": USER_AGENT}
291
- response = requests.get(search_url, headers=headers, timeout=10)
292
- response.raise_for_status()
293
- soup = BeautifulSoup(response.text, "html.parser")
294
-
295
- img_tags = soup.find_all("img")
296
- image_urls = []
297
-
298
- # Look for potential image URLs, prioritizing data-src or src containing http
299
- for img in img_tags:
300
- src = img.get("data-src") or img.get("src")
301
- if src and src.startswith("http") and not "gstatic" in src and not src.startswith("data:image"):
302
- # Basic check for likely image file extensions
303
- if any(ext in src.lower() for ext in ['.jpg', '.jpeg', '.png', '.webp']):
304
- image_urls.append(src)
305
-
306
- if image_urls:
307
- # Return a random one from the first few results
308
- print(f"Found {len(image_urls)} potential Google Images for '{query}'.")
309
- return random.choice(image_urls[:min(len(image_urls), 10)])
310
- else:
311
- print(f"No suitable Google Images found for query: {query}")
312
- return None
313
- except requests.exceptions.RequestException as e:
314
- print(f"Error during Google Images search request: {e}")
315
- return None
316
- except Exception as e:
317
- print(f"Error parsing Google Images search results: {e}")
318
- return None
319
 
320
  def download_media(media_url, filename_prefix, target_folder):
321
  """Download media (image or video) from a URL."""
 
 
322
  try:
323
  headers = {"User-Agent": USER_AGENT}
324
  response = requests.get(media_url, headers=headers, stream=True, timeout=30)
325
  response.raise_for_status()
326
-
327
- # Try to determine file extension from URL or Content-Type
328
- content_type = response.headers.get('content-type')
329
  file_extension = ".jpg" # Default
330
- if 'video' in content_type:
331
- file_extension = ".mp4"
332
- elif 'image/jpeg' in content_type:
333
- file_extension = ".jpg"
334
- elif 'image/png' in content_type:
335
- file_extension = ".png"
336
- elif 'image/webp' in content_type:
337
- file_extension = ".webp"
338
  else: # Guess from URL
339
- if '.mp4' in media_url: file_extension = ".mp4"
340
- elif '.mov' in media_url: file_extension = ".mov" # May need conversion later
341
- elif '.jpg' in media_url or '.jpeg' in media_url: file_extension = ".jpg"
342
- elif '.png' in media_url: file_extension = ".png"
343
- elif '.webp' in media_url: file_extension = ".webp"
344
-
345
 
346
  filename = os.path.join(target_folder, f"{filename_prefix}{file_extension}")
347
-
348
  with open(filename, 'wb') as f:
349
- for chunk in response.iter_content(chunk_size=8192):
350
- f.write(chunk)
351
-
352
  print(f"Media downloaded successfully to: {filename}")
353
 
354
- # Basic validation for images
355
  if file_extension in [".jpg", ".png", ".webp"]:
356
  try:
357
  img = Image.open(filename)
358
- img.verify() # Check if it's a valid image file
359
  img.close()
360
- # Re-open and convert to RGB if necessary (MoviePy prefers RGB)
361
  img = Image.open(filename)
362
  if img.mode != 'RGB':
363
  print(f"Converting image {filename} to RGB.")
364
  rgb_img = img.convert('RGB')
365
- # Overwrite if JPG, save as JPG if PNG/WEBP for consistency
366
  jpg_filename = os.path.join(target_folder, f"{filename_prefix}.jpg")
367
  rgb_img.save(jpg_filename, "JPEG")
368
  rgb_img.close()
369
  img.close()
370
- if filename != jpg_filename: # Remove original if format changed
371
- os.remove(filename)
372
- return jpg_filename # Return path to the JPG
373
- else:
374
- img.close()
375
-
376
  except Exception as e_validate:
377
  print(f"Downloaded file {filename} is not a valid image or conversion failed: {e_validate}")
378
  if os.path.exists(filename): os.remove(filename)
379
  return None
380
-
381
- return filename # Return original path for videos or already RGB images
382
-
383
  except requests.exceptions.RequestException as e_download:
384
  print(f"Media download error from {media_url}: {e_download}")
385
  return None
@@ -387,114 +323,106 @@ def download_media(media_url, filename_prefix, target_folder):
387
  print(f"General error during media download/processing: {e_general}")
388
  return None
389
 
 
390
  def generate_media(prompt):
391
  """
392
  Find and download a visual asset (video or image) based on the prompt.
393
- Prioritizes Pexels Video, then Pexels Image, then Google Image as fallback.
 
394
  """
395
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
396
- filename_prefix = f"{safe_prompt}_{int(time.time())}" # Add timestamp for uniqueness
 
 
397
 
398
  # 1. Try Pexels Video (if probability met)
399
  if random.random() < video_clip_probability:
400
- print(f"Searching Pexels Video for: {prompt}")
401
  video_url = search_pexels(prompt, PEXELS_API_KEY, search_type="videos")
402
  if video_url:
403
  downloaded_path = download_media(video_url, filename_prefix + "_vid", TEMP_FOLDER)
404
  if downloaded_path:
 
405
  return {"path": downloaded_path, "asset_type": "video"}
406
  else:
407
- print("Pexels video search failed or no suitable video found.")
408
 
409
  # 2. Try Pexels Image
410
- print(f"Searching Pexels Image for: {prompt}")
411
  image_url = search_pexels(prompt, PEXELS_API_KEY, search_type="photos")
412
  if image_url:
413
  downloaded_path = download_media(image_url, filename_prefix + "_img", TEMP_FOLDER)
414
  if downloaded_path:
 
415
  return {"path": downloaded_path, "asset_type": "image"}
416
  else:
417
- print("Pexels image search failed.")
418
-
419
- # 3. Fallback: Try Google Images (especially for specific/newsy terms)
420
- print(f"Falling back to Google Image search for: {prompt}")
421
- google_image_url = search_google_images(prompt)
422
- if google_image_url:
423
- downloaded_path = download_media(google_image_url, filename_prefix + "_gimg", TEMP_FOLDER)
424
- if downloaded_path:
425
- return {"path": downloaded_path, "asset_type": "image"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
  else:
427
- print("Google image search failed.")
428
 
429
- # 4. Absolute Fallback: Generic Image
430
- print("All searches failed. Using absolute fallback: 'technology'")
431
- fallback_url = search_pexels("technology", PEXELS_API_KEY, search_type="photos")
432
- if fallback_url:
433
- downloaded_path = download_media(fallback_url, filename_prefix + "_fallback", TEMP_FOLDER)
434
- if downloaded_path:
435
- return {"path": downloaded_path, "asset_type": "image"}
436
 
437
- print(f"FATAL: Could not retrieve any media for prompt: {prompt}")
 
438
  return None # Indicate failure
439
 
 
440
  def generate_tts(text, voice_id, speed):
441
- """
442
- Generate TTS audio using Kokoro, falling back to gTTS.
443
- """
444
- # Sanitize text for filename (simple approach)
445
  safe_text_prefix = re.sub(r'[^\w\s-]', '', text[:20]).strip().replace(' ', '_')
446
  output_filename = os.path.join(TEMP_FOLDER, f"tts_{safe_text_prefix}_{voice_id}.wav")
447
-
448
- # --- Try Kokoro TTS First ---
449
- if pipeline: # Check if Kokoro was initialized
450
  try:
451
  print(f"Generating TTS with Kokoro (Voice: {voice_id}, Speed: {speed}) for: '{text[:30]}...'")
452
- # Generate audio segment(s)
453
- generator = pipeline(text, voice=voice_id, speed=speed) # Removed split_pattern for simplicity
454
  audio_segments = []
455
- # Kokoro's generator might yield differently depending on version/setup
456
- # Assuming it yields tuples or directly the audio array
457
  for item in generator:
458
  if isinstance(item, tuple) and len(item) > 0 and isinstance(item[-1], np.ndarray):
459
- audio_segments.append(item[-1]) # Assume audio is the last element
460
  elif isinstance(item, np.ndarray):
461
- audio_segments.append(item) # Assume it yields audio directly
462
- # Add more checks if Kokoro's output structure is different
463
-
464
- if not audio_segments:
465
- raise ValueError("Kokoro TTS returned no audio segments.")
466
-
467
- # Concatenate if multiple segments
468
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 0 else audio_segments[0]
469
-
470
- # Ensure audio is float32 for soundfile if needed (Kokoro usually outputs float32)
471
  if full_audio.dtype != np.float32:
472
  full_audio = full_audio.astype(np.float32)
473
- # Normalize if necessary after type conversion
474
  max_val = np.max(np.abs(full_audio))
475
- if max_val > 1.0:
476
- full_audio /= max_val
477
-
478
- sf.write(output_filename, full_audio, 24000) # Kokoro default sample rate
479
  print(f"Kokoro TTS audio saved to {output_filename}")
480
  return output_filename
481
  except Exception as e:
482
  print(f"Error with Kokoro TTS: {e}. Falling back to gTTS.")
483
- # Fall through to gTTS block
484
-
485
- # --- Fallback to gTTS ---
486
  try:
487
  print(f"Generating TTS with gTTS for: '{text[:30]}...'")
488
- tts = gTTS(text=text, lang='en', slow= (speed < 0.9) ) # gTTS speed is boolean (slow/normal)
489
  mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text_prefix}_gtts.mp3")
490
- wav_path = output_filename # Reuse the intended output filename
491
  tts.save(mp3_path)
492
-
493
- # Convert MP3 to WAV using pydub
494
  audio = AudioSegment.from_mp3(mp3_path)
495
  audio.export(wav_path, format="wav")
496
- os.remove(mp3_path) # Clean up temporary mp3
497
-
498
  print(f"gTTS audio saved and converted to {wav_path}")
499
  return wav_path
500
  except ImportError:
@@ -504,577 +432,317 @@ def generate_tts(text, voice_id, speed):
504
  print(f"Error with gTTS fallback: {fallback_error}")
505
  return None
506
 
507
-
508
  def apply_kenburns_effect(clip, target_resolution, duration):
509
  """Apply a randomized Ken Burns effect (zoom/pan) to an image clip."""
 
510
  target_w, target_h = target_resolution
511
  img_w, img_h = clip.size
512
-
513
- # Resize image slightly larger than target to allow movement
514
- # Maintain aspect ratio, fit to cover target dimensions + margin
515
- scale_factor = 1.2 # Zoom margin
516
  scaled_w, scaled_h = img_w * scale_factor, img_h * scale_factor
517
-
518
- # Determine resize dimensions to cover target + margin
519
  if scaled_w / scaled_h > target_w / target_h:
520
- # Wider than target aspect ratio -> fit height
521
  final_h = target_h * scale_factor
522
  final_w = final_h * (img_w / img_h)
523
  else:
524
- # Taller than target aspect ratio -> fit width
525
  final_w = target_w * scale_factor
526
  final_h = final_w * (img_h / img_w)
527
-
528
  final_w, final_h = int(final_w), int(final_h)
529
-
530
- # Create a resized version for the animation base
531
- # Using Pillow for resizing before passing to MoviePy might be more reliable for high-res
532
  try:
533
- pil_img = Image.fromarray(clip.get_frame(0)) # Get frame as numpy array, convert to PIL
534
  resized_pil = pil_img.resize((final_w, final_h), Image.Resampling.LANCZOS)
535
  resized_clip = ImageClip(np.array(resized_pil)).set_duration(duration)
536
  except Exception as e:
537
  print(f"Warning: Error during high-quality resize for Ken Burns, using MoviePy default: {e}")
538
- # Fallback to moviepy resize (might be lower quality for stills)
539
  resized_clip = clip.resize(newsize=(final_w, final_h)).set_duration(duration)
540
-
541
-
542
- # Define start and end zoom/position parameters randomly
543
  max_move_x = final_w - target_w
544
  max_move_y = final_h - target_h
545
-
546
- # Choose effect type
547
  effect = random.choice(['zoom_in', 'zoom_out', 'pan_lr', 'pan_rl', 'pan_td', 'pan_dt'])
548
-
549
- if effect == 'zoom_in':
550
- zoom_start, zoom_end = 1.0, scale_factor
551
- x_start, x_end = max_move_x / 2, max_move_x / 2
552
- y_start, y_end = max_move_y / 2, max_move_y / 2
553
- elif effect == 'zoom_out':
554
- zoom_start, zoom_end = scale_factor, 1.0
555
- x_start, x_end = max_move_x / 2, max_move_x / 2
556
- y_start, y_end = max_move_y / 2, max_move_y / 2
557
- elif effect == 'pan_lr': # Pan Left to Right
558
- zoom_start, zoom_end = scale_factor, scale_factor # Constant zoom while panning
559
- x_start, x_end = 0, max_move_x
560
- y_start, y_end = max_move_y / 2, max_move_y / 2 # Center vertically
561
- elif effect == 'pan_rl': # Pan Right to Left
562
- zoom_start, zoom_end = scale_factor, scale_factor
563
- x_start, x_end = max_move_x, 0
564
- y_start, y_end = max_move_y / 2, max_move_y / 2
565
- elif effect == 'pan_td': # Pan Top to Down
566
- zoom_start, zoom_end = scale_factor, scale_factor
567
- x_start, x_end = max_move_x / 2, max_move_x / 2 # Center horizontally
568
- y_start, y_end = 0, max_move_y
569
- else: # Pan Down to Top
570
- zoom_start, zoom_end = scale_factor, scale_factor
571
- x_start, x_end = max_move_x / 2, max_move_x / 2
572
- y_start, y_end = max_move_y, 0
573
-
574
- # Define the function for fl_image
575
  def make_frame(t):
576
- # Linear interpolation for time
577
  interp = t / duration if duration else 0
578
-
579
- # Interpolate zoom and position
580
  current_zoom = zoom_start + (zoom_end - zoom_start) * interp
581
  current_x = x_start + (x_end - x_start) * interp
582
  current_y = y_start + (y_end - y_start) * interp
583
-
584
- # Calculate the crop box dimensions based on current zoom
585
- # The size of the box to crop from the *resized_clip*
586
- crop_w = target_w / (current_zoom / scale_factor) # Adjust target by zoom relative to initial scale
587
- crop_h = target_h / (current_zoom / scale_factor)
588
-
589
- # Ensure crop dimensions are valid
590
- crop_w = max(1, int(crop_w))
591
- crop_h = max(1, int(crop_h))
592
-
593
- # Calculate the top-left corner of the crop box
594
- # current_x/y is the center offset in the scaled image
595
- # We need top-left corner (x1, y1)
596
- x1 = current_x # Assume current_x/y are already top-left offsets
597
- y1 = current_y
598
-
599
- # Clamp coordinates to stay within bounds
600
- x1 = max(0, min(x1, final_w - crop_w))
601
- y1 = max(0, min(y1, final_h - crop_h))
602
-
603
- # Get the frame from the *potentially pre-resized* clip
604
  frame = resized_clip.get_frame(t)
605
-
606
- # Crop the frame
607
- # Note: MoviePy's crop uses x1,y1,x2,y2 or x_center, y_center, width, height
608
- # Using numpy slicing might be easier here
609
  cropped_frame = frame[int(y1):int(y1 + crop_h), int(x1):int(x1 + crop_w)]
610
-
611
- # Resize the cropped frame to the final target resolution
612
- # Using OpenCV for potentially better resizing quality
613
  final_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
614
-
615
  return final_frame
616
-
617
- # Apply the transformation using fl
618
- return resized_clip.fl(make_frame, apply_to=['mask']) # Apply to mask if exists
619
 
620
  def resize_to_fill(clip, target_resolution):
621
  """Resize and crop a video clip to fill the target resolution."""
 
622
  target_w, target_h = target_resolution
623
  target_aspect = target_w / target_h
624
-
625
- # Use MoviePy's resize and crop fx
626
- # Resize to fit the smaller dimension to cover the target area
627
- if clip.w / clip.h > target_aspect:
628
- # Clip is wider than target -> resize based on height
629
- resized_clip = clip.resize(height=target_h)
630
- else:
631
- # Clip is taller or equal aspect -> resize based on width
632
- resized_clip = clip.resize(width=target_w)
633
-
634
- # Crop the excess from the center
635
  crop_x = max(0, (resized_clip.w - target_w) / 2)
636
  crop_y = max(0, (resized_clip.h - target_h) / 2)
637
-
638
  cropped_clip = resized_clip.crop(x1=crop_x, y1=crop_y, width=target_w, height=target_h)
639
  return cropped_clip
640
 
641
  def add_background_music(video_clip, music_file_path, volume):
642
  """Add background music, looping if necessary."""
 
643
  if not music_file_path or not os.path.exists(music_file_path):
644
  print("No background music file found or provided. Skipping.")
645
  return video_clip
646
-
647
  try:
648
  print(f"Adding background music from: {music_file_path}")
649
  bg_music = AudioFileClip(music_file_path)
650
-
651
- if bg_music.duration > video_clip.duration:
652
- # Trim music to video duration
653
- bg_music = bg_music.subclip(0, video_clip.duration)
654
  elif bg_music.duration < video_clip.duration:
655
- # Loop music to fit video duration
656
  loops_needed = math.ceil(video_clip.duration / bg_music.duration)
657
  bg_music = concatenate_audioclips([bg_music] * loops_needed)
658
- bg_music = bg_music.subclip(0, video_clip.duration) # Trim precisely
659
-
660
- # Apply volume adjustment
661
  bg_music = bg_music.volumex(volume)
662
-
663
- # Combine with existing audio (if any)
664
- final_audio = CompositeAudioClip([video_clip.audio, bg_music])
 
 
 
665
  video_clip = video_clip.set_audio(final_audio)
666
  print("Background music added successfully.")
667
  return video_clip
668
-
669
  except Exception as e:
670
  print(f"Error adding background music: {e}. Skipping.")
671
- return video_clip # Return original clip
672
-
673
 
674
  def create_segment_clip(media_info, tts_path, narration_text):
675
  """Create a single video segment (clip) with visuals, audio, and subtitles."""
 
676
  try:
677
  media_path = media_info['path']
678
  asset_type = media_info['asset_type']
679
  print(f"Creating clip segment: Type={asset_type}, Media={os.path.basename(media_path)}")
680
-
681
- # --- 1. Load Audio ---
682
- if not os.path.exists(tts_path):
683
- print(f"Error: TTS file not found: {tts_path}")
684
- return None
685
  audio_clip = AudioFileClip(tts_path)
686
- # Add tiny silence buffer at start/end for transitions
687
- segment_duration = audio_clip.duration + 0.3 # Add buffer
688
-
689
- # --- 2. Create Visual Clip ---
690
  if asset_type == "video":
691
- if not os.path.exists(media_path):
692
- print(f"Error: Video file not found: {media_path}")
693
- return None
694
  video_clip = VideoFileClip(media_path)
695
- # Ensure video covers segment duration, looping if needed
696
  if video_clip.duration < segment_duration:
697
- # Loop the video
698
  loops = math.ceil(segment_duration / video_clip.duration)
699
- video_clip = concatenate_videoclips([video_clip] * loops)
700
- # Trim to exact segment duration
 
 
 
 
 
 
 
 
 
 
701
  video_clip = video_clip.subclip(0, segment_duration)
702
- # Resize/crop to fit target resolution
703
  visual_clip = resize_to_fill(video_clip, TARGET_RESOLUTION)
704
-
705
  elif asset_type == "image":
706
- if not os.path.exists(media_path):
707
- print(f"Error: Image file not found: {media_path}")
708
- return None
709
- # Create ImageClip and apply Ken Burns
710
  img_clip = ImageClip(media_path).set_duration(segment_duration)
711
- # Apply Ken Burns effect
712
  visual_clip = apply_kenburns_effect(img_clip, TARGET_RESOLUTION, segment_duration)
713
- # Ensure final size is correct after effects
714
  visual_clip = visual_clip.resize(newsize=TARGET_RESOLUTION)
715
-
716
- else:
717
- print(f"Error: Unknown asset type: {asset_type}")
718
- return None
719
-
720
- # Add fade-in/out for smoother transitions between segments
721
  visual_clip = visual_clip.fadein(0.15).fadeout(0.15)
722
-
723
- # --- 3. Add Subtitles (Chunk-based, No ImageMagick) ---
724
  subtitle_clips = []
725
  if USE_CAPTIONS and narration_text:
726
  words = narration_text.split()
727
- # Simple chunking: group words, max ~5 per chunk or based on duration
728
- max_words_per_chunk = 5
729
- chunks = []
730
- current_chunk = []
731
  for word in words:
732
  current_chunk.append(word)
733
- if len(current_chunk) >= max_words_per_chunk:
734
- chunks.append(" ".join(current_chunk))
735
- current_chunk = []
736
- if current_chunk: # Add any remaining words
737
- chunks.append(" ".join(current_chunk))
738
-
739
- if not chunks: # Handle empty narration case
740
- print("Warning: Narration text is empty, skipping subtitles for this segment.")
741
-
742
  else:
743
- num_chunks = len(chunks)
744
- chunk_duration = audio_clip.duration / num_chunks # Duration per text chunk
745
- start_time = 0.1 # Small delay before first subtitle
746
-
747
  for i, chunk_text in enumerate(chunks):
748
- end_time = start_time + chunk_duration
749
- # Ensure end_time doesn't exceed the visual clip duration slightly
750
- end_time = min(end_time, segment_duration - 0.1)
751
-
752
- txt_clip = TextClip(
753
- txt=chunk_text,
754
- fontsize=font_size,
755
- font=caption_font, # Ensure this font is available or use default
756
- color=caption_style_text_color,
757
- bg_color=caption_style_bg_color, # Semi-transparent background
758
- method='label', # Use 'label' - simpler, less likely ImageMagick dependency
759
- align='center',
760
- size=(TARGET_RESOLUTION[0] * 0.8, None) # Width constraint
761
- )
762
- # Position: center horizontally, lower part of the screen
763
- txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1] * 0.80))
764
- txt_clip = txt_clip.set_start(start_time).set_duration(end_time - start_time)
765
- # txt_clip = txt_clip.fadein(0.1).fadeout(0.1) # Optional fade for text
766
-
767
- subtitle_clips.append(txt_clip)
768
- start_time = end_time # Next chunk starts where the last one ended
769
-
770
- # --- 4. Combine Visuals, Audio, and Subtitles ---
771
- if subtitle_clips:
772
- final_clip = CompositeVideoClip([visual_clip] + subtitle_clips)
773
- else:
774
- final_clip = visual_clip # No subtitles
775
-
776
- # Set the audio for the final segment clip
777
- final_clip = final_clip.set_audio(audio_clip.set_start(0.15)) # Start audio slightly after visual fade-in
778
-
779
  print(f"Clip segment created successfully. Duration: {final_clip.duration:.2f}s")
780
  return final_clip
781
-
782
  except Exception as e:
783
  print(f"Error creating clip segment: {e}")
784
  import traceback
785
- traceback.print_exc() # Print detailed traceback for debugging
786
  return None
787
 
788
  # ---------------- Main Video Generation Function ---------------- #
789
 
790
  def generate_full_video(user_input, resolution_choice, caption_choice, music_file_info):
791
- """
792
- Main function orchestrating the video generation process.
793
- Uses global variables set by Gradio for configuration.
794
- """
795
  global TARGET_RESOLUTION, TEMP_FOLDER, USE_CAPTIONS
796
-
797
- print("\n--- Starting Video Generation ---")
798
- start_time = time.time()
799
-
800
- # 1. Setup Environment
801
- if resolution_choice == "Short (9:16)":
802
- TARGET_RESOLUTION = (1080, 1920)
803
- print("Resolution set to: Short (1080x1920)")
804
- else: # Default to Full HD
805
- TARGET_RESOLUTION = (1920, 1080)
806
- print("Resolution set to: Full HD (1920x1080)")
807
-
808
- USE_CAPTIONS = (caption_choice == "Yes")
809
- print(f"Captions Enabled: {USE_CAPTIONS}")
810
-
811
- # Create unique temporary folder for this run
812
- TEMP_FOLDER = tempfile.mkdtemp(prefix="aivideo_")
813
- print(f"Temporary folder created: {TEMP_FOLDER}")
814
-
815
- # Handle uploaded music file
816
  music_file_path = None
817
  if music_file_info is not None:
818
  try:
819
- # Copy uploaded file to a predictable location (or use directly if safe)
820
- # Using a fixed name in the temp folder is often easier
821
  music_file_path = os.path.join(TEMP_FOLDER, "background_music.mp3")
822
  shutil.copy(music_file_info.name, music_file_path)
823
  print(f"Background music copied to: {music_file_path}")
824
- except Exception as e:
825
- print(f"Error handling uploaded music file: {e}")
826
- music_file_path = None # Ensure it's None if copy failed
827
-
828
 
829
- # --- No ImageMagick Fix Needed ---
830
- # print("Skipping ImageMagick policy check as alternative subtitles are used.")
831
-
832
-
833
- # 2. Generate Script
834
- print("\nStep 1: Generating script...")
835
- script_text = generate_script(user_input)
836
- if not script_text:
837
- print("ERROR: Failed to generate script. Aborting.")
838
- shutil.rmtree(TEMP_FOLDER)
839
- return None, "Error: Script generation failed. Check API keys and OpenRouter status." # Return error message for Gradio
840
  print("Script Generated:\n", script_text)
841
 
842
-
843
- # 3. Parse Script
844
- print("\nStep 2: Parsing script...")
845
- segments = parse_script(script_text)
846
- if not segments:
847
- print("ERROR: Failed to parse script into segments. Aborting.")
848
- shutil.rmtree(TEMP_FOLDER)
849
- return None, "Error: Script parsing failed. Check script format."
850
  print(f"Successfully parsed {len(segments)} segments.")
851
 
852
-
853
- # 4. Generate Media and TTS for each segment
854
  print("\nStep 3: Generating media and TTS for each segment...")
855
- segment_clips = []
856
- total_segments = len(segments)
857
  for i, segment in enumerate(segments):
858
  print(f"\n--- Processing Segment {i+1}/{total_segments} ---")
859
  print(f" Prompt: {segment['prompt']}")
860
  print(f" Narration: {segment['narration']}")
861
 
862
- # Generate Media
863
  media_info = generate_media(segment['prompt'])
 
864
  if not media_info:
865
- print(f"Warning: Failed to get media for segment {i+1}. Skipping segment.")
866
- continue # Skip this segment if media fails
867
 
868
- # Generate TTS
869
  tts_path = generate_tts(segment['narration'], selected_voice, voice_speed)
870
  if not tts_path:
871
  print(f"Warning: Failed to generate TTS for segment {i+1}. Skipping segment.")
872
- # Clean up downloaded media if TTS failed
873
  if media_info and os.path.exists(media_info['path']):
874
- try: os.remove(media_info['path'])
875
  except OSError: pass
876
- continue # Skip this segment if TTS fails
877
 
878
- # Create the actual MoviePy clip for this segment
879
  clip = create_segment_clip(media_info, tts_path, segment['narration'])
880
  if clip:
881
  segment_clips.append(clip)
882
  else:
883
  print(f"Warning: Failed to create video clip for segment {i+1}. Skipping.")
884
- # Clean up files for this failed segment
885
  if media_info and os.path.exists(media_info['path']):
886
- try: os.remove(media_info['path'])
887
  except OSError: pass
888
  if tts_path and os.path.exists(tts_path):
889
- try: os.remove(tts_path)
890
  except OSError: pass
891
 
892
-
893
  if not segment_clips:
894
  print("ERROR: No video clips were successfully created. Aborting.")
895
  shutil.rmtree(TEMP_FOLDER)
896
- return None, "Error: Failed to create any video segments. Check logs."
897
-
898
 
899
- # 5. Concatenate Clips
900
- print("\nStep 4: Concatenating video segments...")
901
  try:
902
- final_video = concatenate_videoclips(segment_clips, method="compose")
 
 
 
 
903
  print("Segments concatenated successfully.")
904
  except Exception as e:
905
- print(f"ERROR: Failed to concatenate video clips: {e}")
906
- shutil.rmtree(TEMP_FOLDER)
907
- return None, f"Error: Concatenation failed: {e}"
908
-
909
-
910
- # 6. Add Background Music
911
- print("\nStep 5: Adding background music...")
912
- final_video = add_background_music(final_video, music_file_path, bg_music_volume)
913
 
 
914
 
915
- # 7. Export Final Video
916
- print(f"\nStep 6: Exporting final video to '{OUTPUT_VIDEO_FILENAME}'...")
917
  try:
918
- final_video.write_videofile(
919
- OUTPUT_VIDEO_FILENAME,
920
- codec='libx264', # Good balance of quality/compatibility
921
- audio_codec='aac', # Standard audio codec
922
- fps=fps,
923
- preset=preset, # Controls encoding speed vs compression
924
- threads=4, # Use multiple threads for faster encoding
925
- logger='bar' # Show progress bar
926
- )
927
  print(f"Final video saved successfully as {OUTPUT_VIDEO_FILENAME}")
928
  export_success = True
929
  except Exception as e:
930
- print(f"ERROR: Failed to write final video file: {e}")
931
- import traceback
932
- traceback.print_exc()
933
- export_success = False
934
-
935
-
936
- # 8. Cleanup
937
- print("\nStep 7: Cleaning up temporary files...")
938
- try:
939
- shutil.rmtree(TEMP_FOLDER)
940
- print(f"Temporary folder {TEMP_FOLDER} removed.")
941
- except Exception as e:
942
- print(f"Warning: Failed to remove temporary folder {TEMP_FOLDER}: {e}")
943
 
 
 
 
944
 
945
- end_time = time.time()
946
- total_time = end_time - start_time
947
- print(f"\n--- Video Generation Finished ---")
948
- print(f"Total time: {total_time:.2f} seconds")
949
-
950
- if export_success:
951
- return OUTPUT_VIDEO_FILENAME, f"Video generation complete! Time: {total_time:.2f}s"
952
- else:
953
- return None, f"Error: Video export failed. Check logs. Time: {total_time:.2f}s"
954
 
955
 
956
  # ---------------- Gradio Interface Definition ---------------- #
957
-
958
- # Kokoro Voice Choices (Example subset, add more as needed)
959
  VOICE_CHOICES = {
960
- # American English
961
- 'Emma (US Female)': 'af_heart',
962
- 'Bella (US Female)': 'af_bella',
963
- 'Nicole (US Female)': 'af_nicole',
964
- 'Sarah (US Female)': 'af_sarah',
965
- 'Michael (US Male)': 'am_michael',
966
- 'Eric (US Male)': 'am_eric',
967
- 'Adam (US Male)': 'am_adam',
968
- # British English
969
- 'Emma (UK Female)': 'bf_emma',
970
- 'Alice (UK Female)': 'bf_alice',
971
- 'George (UK Male)': 'bm_george',
972
- 'Daniel (UK Male)': 'bm_daniel',
973
  }
974
-
975
- def gradio_interface_handler(
976
- user_prompt, resolution, captions, bg_music,
977
- voice_name, video_prob, music_vol, video_fps, export_preset,
978
- tts_speed, caption_size):
979
- """Wrapper function to connect Gradio inputs to the main generation logic."""
980
-
981
  print("\n--- Received Request from Gradio ---")
982
- print(f"Prompt: {user_prompt[:50]}...")
983
- print(f"Resolution: {resolution}")
984
- print(f"Captions: {captions}")
985
- print(f"Music File: {'Provided' if bg_music else 'None'}")
986
- print(f"Voice: {voice_name}")
987
- print(f"Video Probability: {video_prob}%")
988
- print(f"Music Volume: {music_vol}")
989
- print(f"FPS: {video_fps}")
990
- print(f"Preset: {export_preset}")
991
- print(f"TTS Speed: {tts_speed}")
992
- print(f"Caption Size: {caption_size}")
993
-
994
- # Update global settings based on Gradio inputs
995
  global selected_voice, voice_speed, font_size, video_clip_probability, bg_music_volume, fps, preset
996
- selected_voice = VOICE_CHOICES.get(voice_name, 'af_heart') # Get voice ID from name
997
- voice_speed = tts_speed
998
- font_size = caption_size
999
- video_clip_probability = video_prob / 100.0 # Convert percentage to probability
1000
- bg_music_volume = music_vol
1001
- fps = video_fps
1002
- preset = export_preset
1003
-
1004
- # Call the main video generation function
1005
  video_path, status_message = generate_full_video(user_prompt, resolution, captions, bg_music)
1006
-
1007
  print(f"Gradio Handler Status: {status_message}")
1008
-
1009
- # Return the video path (or None if failed) and the status message
1010
  return video_path, status_message
1011
 
1012
-
1013
- # Create Gradio Interface
1014
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
1015
  gr.Markdown("# 🎬 AI Documentary Video Generator")
1016
- gr.Markdown("Enter a topic or detailed instructions, customize settings, and generate a short documentary-style video. Uses AI for script, TTS, and finds relevant stock footage.")
1017
-
1018
  with gr.Row():
1019
  with gr.Column(scale=2):
1020
- prompt_input = gr.Textbox(
1021
- label="Video Concept / Topic / Script",
1022
- placeholder="e.g., 'The history of coffee', 'Top 5 strangest animals', or paste a full script following the format.",
1023
- lines=4
1024
- )
1025
  submit_button = gr.Button("Generate Video", variant="primary")
1026
  status_output = gr.Textbox(label="Status", interactive=False)
1027
-
1028
- with gr.Column(scale=1):
1029
- video_output = gr.Video(label="Generated Video")
1030
-
1031
-
1032
  with gr.Accordion("⚙️ Advanced Settings", open=False):
1033
  gr.Markdown("### Video & Audio Settings")
1034
  with gr.Row():
1035
  resolution_dd = gr.Dropdown(["Full HD (16:9)", "Short (9:16)"], label="Resolution", value="Full HD (16:9)")
1036
  caption_dd = gr.Radio(["Yes", "No"], label="Generate Captions", value="Yes")
1037
  music_upload = gr.File(label="Upload Background Music (MP3)", file_types=[".mp3"])
1038
-
1039
  gr.Markdown("### Voice & Narration")
1040
  with gr.Row():
1041
  voice_dd = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), label="Narration Voice", value="Emma (US Female)")
1042
  speed_slider = gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed")
1043
-
1044
  gr.Markdown("### Visuals & Style")
1045
  with gr.Row():
1046
  video_prob_slider = gr.Slider(0, 100, value=35, step=5, label="Video Clip % (vs. Images)")
1047
  caption_size_slider = gr.Slider(20, 80, value=45, step=1, label="Caption Font Size")
1048
-
1049
  gr.Markdown("### Export Settings")
1050
  with gr.Row():
1051
  music_vol_slider = gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume")
1052
  fps_slider = gr.Slider(15, 60, value=30, step=1, label="Video FPS")
1053
- preset_dd = gr.Dropdown(
1054
- choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"],
1055
- value="veryfast", label="Export Quality/Speed Preset"
1056
- )
1057
-
1058
- # Connect components
1059
- submit_button.click(
1060
- fn=gradio_interface_handler,
1061
- inputs=[
1062
- prompt_input, resolution_dd, caption_dd, music_upload,
1063
- voice_dd, video_prob_slider, music_vol_slider, fps_slider, preset_dd,
1064
- speed_slider, caption_size_slider
1065
- ],
1066
- outputs=[video_output, status_output]
1067
- )
1068
 
1069
  # Launch the interface
1070
  if __name__ == "__main__":
1071
  print("Launching Gradio Interface...")
1072
- # Make sure to replace API keys at the top before running!
1073
  if PEXELS_API_KEY == 'YOUR_PEXELS_API_KEY' or OPENROUTER_API_KEY == 'YOUR_OPENROUTER_API_KEY':
1074
  print("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
1075
  print("!!! WARNING: API Keys not set in the script. !!!")
1076
  print("!!! Please replace 'YOUR_PEXELS_API_KEY' and !!!")
1077
  print("!!! 'YOUR_OPENROUTER_API_KEY' with your actual keys. !!!")
1078
  print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
1079
-
1080
- iface.launch(share=True, debug=True) # Share=True for public link, Debug=True for more logs
 
14
  import shutil
15
  from urllib.parse import quote
16
  import numpy as np
17
+ from bs4 import BeautifulSoup # Keep import in case needed elsewhere, but not for search
18
  import base64
19
  from gtts import gTTS
20
  import gradio as gr
 
27
  concatenate_videoclips, CompositeAudioClip
28
  )
29
  import moviepy.video.fx.all as vfx
 
30
 
31
+ # Pydub imports
32
  from pydub import AudioSegment
33
+ from pydub.generators import Sine
34
 
35
  # ---------------- Global Configuration ---------------- #
36
  # --- API Keys (Replace with your actual keys) ---
 
42
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
43
 
44
  # --- Gradio Controlled Variables (with defaults) ---
45
+ selected_voice = 'af_heart'
46
+ voice_speed = 0.9
47
+ font_size = 45
48
+ video_clip_probability = 0.25 # Default 25%
49
+ bg_music_volume = 0.08
50
+ fps = 30
51
+ preset = "veryfast"
52
+ caption_style_bg_color = 'rgba(0, 0, 0, 0.6)'
53
+ caption_style_text_color = 'yellow'
54
+ caption_font = 'Arial-Bold'
55
 
56
  # --- Runtime Variables (set per execution) ---
57
+ TARGET_RESOLUTION = None
58
+ TEMP_FOLDER = None
59
+ USE_CAPTIONS = True
60
 
61
  # ---------------- Kokoro TTS Initialization ---------------- #
62
  try:
 
63
  pipeline = KPipeline(lang_code='a')
64
  print("Kokoro TTS Pipeline initialized.")
65
  except Exception as e:
 
71
 
72
  def generate_script(user_input):
73
  """Generate documentary script using OpenRouter API."""
74
+ # --- Retain previous generate_script function ---
75
+ # (No changes needed here based on the request)
76
  headers = {
77
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
78
  'Content-Type': 'application/json',
 
79
  'HTTP-Referer': 'http://localhost:7860', # Or your app's URL
80
  'X-Title': 'AI Documentary Maker Gradio'
81
  }
 
82
  prompt = f"""Create a short, humorous, slightly negative, and conversational documentary-style script based on the following topic or instructions: '{user_input}'.
83
 
84
  Formatting Rules:
 
105
 
106
  Now generate the script based on: {user_input}
107
  """
 
108
  data = {
109
  'model': OPENROUTER_MODEL,
110
  'messages': [{'role': 'user', 'content': prompt}],
111
+ 'temperature': 0.6,
112
+ 'max_tokens': 600
113
  }
 
114
  try:
115
  response = requests.post(
116
  'https://openrouter.ai/api/v1/chat/completions',
117
  headers=headers,
118
  json=data,
119
+ timeout=45
120
  )
121
+ response.raise_for_status()
 
 
122
  response_data = response.json()
123
  if 'choices' in response_data and len(response_data['choices']) > 0:
124
  script_content = response_data['choices'][0]['message']['content']
125
+ script_content = re.sub(r"^.*?\n?\[", "[", script_content, flags=re.DOTALL)
 
126
  script_content = script_content.strip()
127
  print("Script generated successfully.")
128
  return script_content
129
  else:
130
  print(f"API Error: Unexpected response format: {response_data}")
131
  return None
 
132
  except requests.exceptions.Timeout:
133
  print("API Error: Request timed out.")
134
  return None
 
143
  return None
144
 
145
  def parse_script(script_text):
146
+ """Parse the generated script into segments."""
147
+ # --- Retain previous parse_script function ---
148
+ # (No changes needed here based on the request)
 
149
  segments = []
150
  current_title = None
151
  current_narration = ""
 
152
  if not script_text:
153
  print("Error: Script text is empty.")
154
  return []
 
155
  lines = script_text.strip().splitlines()
 
156
  for line in lines:
157
  line = line.strip()
158
+ if not line: continue
 
 
159
  title_match = re.match(r'^\[(.*?)\](.*)', line)
 
160
  if title_match:
 
161
  if current_title is not None and current_narration.strip():
162
+ segments.append({"prompt": current_title, "narration": current_narration.strip()})
 
 
 
 
 
163
  current_title = title_match.group(1).strip()
164
+ current_narration = title_match.group(2).strip() + " "
165
  elif current_title is not None:
 
166
  current_narration += line + " "
 
 
167
  if current_title is not None and current_narration.strip():
168
+ segments.append({"prompt": current_title, "narration": current_narration.strip()})
 
 
 
 
169
  if not segments:
170
  print("Error: Could not parse any segments from the script.")
 
171
  simple_segments = []
172
  for i, line in enumerate(lines):
173
+ if line.strip(): simple_segments.append({"prompt": f"Scene {i+1}", "narration": line.strip()})
 
174
  if simple_segments:
175
  print("Warning: Using simplified script parsing.")
176
  return simple_segments
177
+ else: return []
 
 
 
178
  print(f"Parsed {len(segments)} segments from script.")
179
  return segments
180
 
181
+ # --- MODIFIED: search_pexels Function ---
182
  def search_pexels(query, api_key, search_type="videos"):
183
+ """Search Pexels for videos or images with improved error handling."""
184
+ if not api_key or api_key == 'YOUR_PEXELS_API_KEY':
185
+ print(f"Pexels API key not provided or is default. Skipping Pexels {search_type} search.")
186
  return None
187
 
188
  base_url = f"https://api.pexels.com/{search_type}/search"
189
  headers = {'Authorization': api_key}
190
  params = {"query": query, "per_page": 15, "orientation": "landscape"}
191
  if search_type == "videos":
192
+ params["size"] = "medium" # Request medium or large
193
 
194
  max_retries = 3
195
+ retry_delay = 2 # Slightly longer initial delay
196
+ timeout_duration = 20 # Increased timeout
197
+
198
+ print(f"Searching Pexels {search_type} for '{query}'...")
199
 
200
  for attempt in range(max_retries):
201
  try:
202
+ response = requests.get(base_url, headers=headers, params=params, timeout=timeout_duration)
203
+
204
+ # Check for specific HTTP errors before raising general exception
205
+ if response.status_code == 401:
206
+ print(f"Pexels API Error: Unauthorized (401). Check your API Key.")
207
+ return None # Don't retry on auth errors
208
+ if response.status_code == 429:
209
+ print(f"Pexels API Error: Rate limit hit (429) (attempt {attempt+1}/{max_retries}). Waiting {retry_delay*2}s...")
210
+ time.sleep(retry_delay * 2) # Wait longer for rate limits
211
+ retry_delay *= 2
212
+ continue # Go to next attempt
213
+ if response.status_code == 522:
214
+ print(f"Pexels API Error: Connection Timed Out (522) between Cloudflare and Pexels server (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay}s...")
215
+ # This error is external, retrying might help if temporary
216
+ time.sleep(retry_delay)
217
+ retry_delay *= 2
218
+ continue # Go to next attempt
219
+
220
+ response.raise_for_status() # Raise exceptions for other 4xx/5xx errors
221
 
222
  data = response.json()
223
  items = data.get(search_type, [])
224
 
225
  if not items:
226
  print(f"No Pexels {search_type} found for query: {query}")
227
+ return None # No results found
228
 
229
+ # --- Filtering logic remains the same ---
230
  valid_items = []
231
  if search_type == "videos":
232
  for video in items:
 
233
  hd_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'hd' and f.get('width', 0) >= 1080), None)
234
  large_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'large' and f.get('width', 0) >= 1080), None)
235
  medium_link = next((f['link'] for f in video.get('video_files', []) if f.get('quality') == 'medium'), None)
236
  link = hd_link or large_link or medium_link
237
+ if link: valid_items.append(link)
 
238
  else: # images
239
  for photo in items:
240
  link = photo.get("src", {}).get("large2x") or photo.get("src", {}).get("original")
241
+ if link: valid_items.append(link)
 
242
 
243
  if valid_items:
244
+ print(f"Found {len(valid_items)} suitable Pexels {search_type} for '{query}'. Selecting one.")
245
  return random.choice(valid_items)
246
  else:
247
  print(f"No suitable quality Pexels {search_type} found for query: {query}")
248
  return None
249
 
250
  except requests.exceptions.Timeout:
251
+ print(f"Pexels API request timed out after {timeout_duration}s (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay}s...")
252
  time.sleep(retry_delay)
253
  retry_delay *= 2
254
  except requests.exceptions.RequestException as e:
255
+ print(f"Pexels API request error (attempt {attempt+1}/{max_retries}): {e}")
256
+ # Don't retry on general request errors unless specifically handled above
257
+ time.sleep(retry_delay)
258
+ retry_delay *= 2 # Still increase delay for next attempt if retrying
 
 
 
 
259
  except Exception as e:
260
  print(f"Unexpected error during Pexels search: {e}")
261
+ break # Stop retrying on unexpected python errors
262
 
263
  print(f"Pexels {search_type} search failed for '{query}' after {max_retries} attempts.")
264
  return None
265
 
266
+ # --- REMOVED: search_google_images Function ---
267
+ # def search_google_images(query):
268
+ # # ... function content removed ...
269
+ # pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
  def download_media(media_url, filename_prefix, target_folder):
272
  """Download media (image or video) from a URL."""
273
+ # --- Retain previous download_media function ---
274
+ # (No changes needed here based on the request, it handles Pexels URLs)
275
  try:
276
  headers = {"User-Agent": USER_AGENT}
277
  response = requests.get(media_url, headers=headers, stream=True, timeout=30)
278
  response.raise_for_status()
279
+ content_type = response.headers.get('content-type', '').lower()
 
 
280
  file_extension = ".jpg" # Default
281
+ if 'video' in content_type: file_extension = ".mp4"
282
+ elif 'image/jpeg' in content_type: file_extension = ".jpg"
283
+ elif 'image/png' in content_type: file_extension = ".png"
284
+ elif 'image/webp' in content_type: file_extension = ".webp"
 
 
 
 
285
  else: # Guess from URL
286
+ media_url_lower = media_url.lower()
287
+ if '.mp4' in media_url_lower: file_extension = ".mp4"
288
+ elif '.mov' in media_url_lower: file_extension = ".mov"
289
+ elif '.jpg' in media_url_lower or '.jpeg' in media_url_lower: file_extension = ".jpg"
290
+ elif '.png' in media_url_lower: file_extension = ".png"
291
+ elif '.webp' in media_url_lower: file_extension = ".webp"
292
 
293
  filename = os.path.join(target_folder, f"{filename_prefix}{file_extension}")
 
294
  with open(filename, 'wb') as f:
295
+ for chunk in response.iter_content(chunk_size=8192): f.write(chunk)
 
 
296
  print(f"Media downloaded successfully to: {filename}")
297
 
 
298
  if file_extension in [".jpg", ".png", ".webp"]:
299
  try:
300
  img = Image.open(filename)
301
+ img.verify()
302
  img.close()
 
303
  img = Image.open(filename)
304
  if img.mode != 'RGB':
305
  print(f"Converting image {filename} to RGB.")
306
  rgb_img = img.convert('RGB')
 
307
  jpg_filename = os.path.join(target_folder, f"{filename_prefix}.jpg")
308
  rgb_img.save(jpg_filename, "JPEG")
309
  rgb_img.close()
310
  img.close()
311
+ if filename != jpg_filename: os.remove(filename)
312
+ return jpg_filename
313
+ else: img.close()
 
 
 
314
  except Exception as e_validate:
315
  print(f"Downloaded file {filename} is not a valid image or conversion failed: {e_validate}")
316
  if os.path.exists(filename): os.remove(filename)
317
  return None
318
+ return filename
 
 
319
  except requests.exceptions.RequestException as e_download:
320
  print(f"Media download error from {media_url}: {e_download}")
321
  return None
 
323
  print(f"General error during media download/processing: {e_general}")
324
  return None
325
 
326
+ # --- MODIFIED: generate_media Function ---
327
  def generate_media(prompt):
328
  """
329
  Find and download a visual asset (video or image) based on the prompt.
330
+ Prioritizes Pexels Video, then Pexels Image. NO Google Image fallback.
331
+ Uses a single generic Pexels image search as the final fallback.
332
  """
333
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
334
+ # Limit length of prompt in filename to avoid issues
335
+ safe_prompt = safe_prompt[:50]
336
+ filename_prefix = f"{safe_prompt}_{int(time.time())}"
337
 
338
  # 1. Try Pexels Video (if probability met)
339
  if random.random() < video_clip_probability:
 
340
  video_url = search_pexels(prompt, PEXELS_API_KEY, search_type="videos")
341
  if video_url:
342
  downloaded_path = download_media(video_url, filename_prefix + "_vid", TEMP_FOLDER)
343
  if downloaded_path:
344
+ print(f"Using Pexels video for '{prompt}'")
345
  return {"path": downloaded_path, "asset_type": "video"}
346
  else:
347
+ print(f"Pexels video search failed or no suitable video found for '{prompt}'. Trying image...")
348
 
349
  # 2. Try Pexels Image
 
350
  image_url = search_pexels(prompt, PEXELS_API_KEY, search_type="photos")
351
  if image_url:
352
  downloaded_path = download_media(image_url, filename_prefix + "_img", TEMP_FOLDER)
353
  if downloaded_path:
354
+ print(f"Using Pexels image for '{prompt}'")
355
  return {"path": downloaded_path, "asset_type": "image"}
356
  else:
357
+ print(f"Pexels image search failed for '{prompt}'.")
358
+
359
+ # --- REMOVED GOOGLE IMAGE SEARCH ---
360
+ # print(f"Falling back to Google Image search for: {prompt}")
361
+ # google_image_url = search_google_images(prompt)
362
+ # ... (removed logic) ...
363
+
364
+ # 3. Absolute Fallback: Generic Pexels Image Search
365
+ # Only use this if the specific prompt searches failed.
366
+ # Avoid searching for problematic terms like "Subscribe CTA".
367
+ fallback_terms = ["technology", "abstract", "nature", "background"]
368
+ # Don't use fallback for prompts that are clearly instructions/CTAs
369
+ if "subscribe" not in prompt.lower() and "cta" not in prompt.lower():
370
+ fallback_term = random.choice(fallback_terms)
371
+ print(f"All specific searches failed for '{prompt}'. Using Pexels fallback term: '{fallback_term}'")
372
+ fallback_url = search_pexels(fallback_term, PEXELS_API_KEY, search_type="photos")
373
+ if fallback_url:
374
+ downloaded_path = download_media(fallback_url, filename_prefix + "_fallback", TEMP_FOLDER)
375
+ if downloaded_path:
376
+ print(f"Using Pexels fallback image '{fallback_term}' for '{prompt}'")
377
+ return {"path": downloaded_path, "asset_type": "image"}
378
+ else:
379
+ print(f"Pexels fallback image download failed for term '{fallback_term}'.")
380
+ else:
381
+ print(f"Pexels fallback image search failed for term '{fallback_term}'.")
382
  else:
383
+ print(f"Skipping fallback search for instructional prompt: '{prompt}'")
384
 
 
 
 
 
 
 
 
385
 
386
+ # 4. Final Failure
387
+ print(f"FATAL: Could not retrieve any suitable media for prompt: '{prompt}' after all attempts.")
388
  return None # Indicate failure
389
 
390
+
391
  def generate_tts(text, voice_id, speed):
392
+ """Generate TTS audio using Kokoro, falling back to gTTS."""
393
+ # --- Retain previous generate_tts function ---
 
 
394
  safe_text_prefix = re.sub(r'[^\w\s-]', '', text[:20]).strip().replace(' ', '_')
395
  output_filename = os.path.join(TEMP_FOLDER, f"tts_{safe_text_prefix}_{voice_id}.wav")
396
+ if pipeline:
 
 
397
  try:
398
  print(f"Generating TTS with Kokoro (Voice: {voice_id}, Speed: {speed}) for: '{text[:30]}...'")
399
+ generator = pipeline(text, voice=voice_id, speed=speed)
 
400
  audio_segments = []
 
 
401
  for item in generator:
402
  if isinstance(item, tuple) and len(item) > 0 and isinstance(item[-1], np.ndarray):
403
+ audio_segments.append(item[-1])
404
  elif isinstance(item, np.ndarray):
405
+ audio_segments.append(item)
406
+ if not audio_segments: raise ValueError("Kokoro TTS returned no audio segments.")
 
 
 
 
 
407
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 0 else audio_segments[0]
 
 
408
  if full_audio.dtype != np.float32:
409
  full_audio = full_audio.astype(np.float32)
 
410
  max_val = np.max(np.abs(full_audio))
411
+ if max_val > 1.0: full_audio /= max_val
412
+ sf.write(output_filename, full_audio, 24000)
 
 
413
  print(f"Kokoro TTS audio saved to {output_filename}")
414
  return output_filename
415
  except Exception as e:
416
  print(f"Error with Kokoro TTS: {e}. Falling back to gTTS.")
 
 
 
417
  try:
418
  print(f"Generating TTS with gTTS for: '{text[:30]}...'")
419
+ tts = gTTS(text=text, lang='en', slow= (speed < 0.9) )
420
  mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text_prefix}_gtts.mp3")
421
+ wav_path = output_filename
422
  tts.save(mp3_path)
 
 
423
  audio = AudioSegment.from_mp3(mp3_path)
424
  audio.export(wav_path, format="wav")
425
+ os.remove(mp3_path)
 
426
  print(f"gTTS audio saved and converted to {wav_path}")
427
  return wav_path
428
  except ImportError:
 
432
  print(f"Error with gTTS fallback: {fallback_error}")
433
  return None
434
 
 
435
  def apply_kenburns_effect(clip, target_resolution, duration):
436
  """Apply a randomized Ken Burns effect (zoom/pan) to an image clip."""
437
+ # --- Retain previous apply_kenburns_effect function ---
438
  target_w, target_h = target_resolution
439
  img_w, img_h = clip.size
440
+ scale_factor = 1.2
 
 
 
441
  scaled_w, scaled_h = img_w * scale_factor, img_h * scale_factor
 
 
442
  if scaled_w / scaled_h > target_w / target_h:
 
443
  final_h = target_h * scale_factor
444
  final_w = final_h * (img_w / img_h)
445
  else:
 
446
  final_w = target_w * scale_factor
447
  final_h = final_w * (img_h / img_w)
 
448
  final_w, final_h = int(final_w), int(final_h)
 
 
 
449
  try:
450
+ pil_img = Image.fromarray(clip.get_frame(0))
451
  resized_pil = pil_img.resize((final_w, final_h), Image.Resampling.LANCZOS)
452
  resized_clip = ImageClip(np.array(resized_pil)).set_duration(duration)
453
  except Exception as e:
454
  print(f"Warning: Error during high-quality resize for Ken Burns, using MoviePy default: {e}")
 
455
  resized_clip = clip.resize(newsize=(final_w, final_h)).set_duration(duration)
 
 
 
456
  max_move_x = final_w - target_w
457
  max_move_y = final_h - target_h
 
 
458
  effect = random.choice(['zoom_in', 'zoom_out', 'pan_lr', 'pan_rl', 'pan_td', 'pan_dt'])
459
+ if effect == 'zoom_in': zoom_start, zoom_end = 1.0, scale_factor; x_start, x_end = max_move_x / 2, max_move_x / 2; y_start, y_end = max_move_y / 2, max_move_y / 2
460
+ elif effect == 'zoom_out': zoom_start, zoom_end = scale_factor, 1.0; x_start, x_end = max_move_x / 2, max_move_x / 2; y_start, y_end = max_move_y / 2, max_move_y / 2
461
+ elif effect == 'pan_lr': zoom_start, zoom_end = scale_factor, scale_factor; x_start, x_end = 0, max_move_x; y_start, y_end = max_move_y / 2, max_move_y / 2
462
+ elif effect == 'pan_rl': zoom_start, zoom_end = scale_factor, scale_factor; x_start, x_end = max_move_x, 0; y_start, y_end = max_move_y / 2, max_move_y / 2
463
+ elif effect == 'pan_td': zoom_start, zoom_end = scale_factor, scale_factor; x_start, x_end = max_move_x / 2, max_move_x / 2; y_start, y_end = 0, max_move_y
464
+ else: zoom_start, zoom_end = scale_factor, scale_factor; x_start, x_end = max_move_x / 2, max_move_x / 2; y_start, y_end = max_move_y, 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  def make_frame(t):
 
466
  interp = t / duration if duration else 0
 
 
467
  current_zoom = zoom_start + (zoom_end - zoom_start) * interp
468
  current_x = x_start + (x_end - x_start) * interp
469
  current_y = y_start + (y_end - y_start) * interp
470
+ crop_w = target_w / (current_zoom / scale_factor); crop_h = target_h / (current_zoom / scale_factor)
471
+ crop_w = max(1, int(crop_w)); crop_h = max(1, int(crop_h))
472
+ x1 = current_x; y1 = current_y
473
+ x1 = max(0, min(x1, final_w - crop_w)); y1 = max(0, min(y1, final_h - crop_h))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  frame = resized_clip.get_frame(t)
 
 
 
 
475
  cropped_frame = frame[int(y1):int(y1 + crop_h), int(x1):int(x1 + crop_w)]
 
 
 
476
  final_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
 
477
  return final_frame
478
+ return resized_clip.fl(make_frame, apply_to=['mask'])
 
 
479
 
480
  def resize_to_fill(clip, target_resolution):
481
  """Resize and crop a video clip to fill the target resolution."""
482
+ # --- Retain previous resize_to_fill function ---
483
  target_w, target_h = target_resolution
484
  target_aspect = target_w / target_h
485
+ if clip.w / clip.h > target_aspect: resized_clip = clip.resize(height=target_h)
486
+ else: resized_clip = clip.resize(width=target_w)
 
 
 
 
 
 
 
 
 
487
  crop_x = max(0, (resized_clip.w - target_w) / 2)
488
  crop_y = max(0, (resized_clip.h - target_h) / 2)
 
489
  cropped_clip = resized_clip.crop(x1=crop_x, y1=crop_y, width=target_w, height=target_h)
490
  return cropped_clip
491
 
492
  def add_background_music(video_clip, music_file_path, volume):
493
  """Add background music, looping if necessary."""
494
+ # --- Retain previous add_background_music function ---
495
  if not music_file_path or not os.path.exists(music_file_path):
496
  print("No background music file found or provided. Skipping.")
497
  return video_clip
 
498
  try:
499
  print(f"Adding background music from: {music_file_path}")
500
  bg_music = AudioFileClip(music_file_path)
501
+ if bg_music.duration > video_clip.duration: bg_music = bg_music.subclip(0, video_clip.duration)
 
 
 
502
  elif bg_music.duration < video_clip.duration:
 
503
  loops_needed = math.ceil(video_clip.duration / bg_music.duration)
504
  bg_music = concatenate_audioclips([bg_music] * loops_needed)
505
+ bg_music = bg_music.subclip(0, video_clip.duration)
 
 
506
  bg_music = bg_music.volumex(volume)
507
+ # Check if video_clip has audio before composing
508
+ if video_clip.audio:
509
+ final_audio = CompositeAudioClip([video_clip.audio, bg_music])
510
+ else:
511
+ # If original clip has no audio, just use the background music
512
+ final_audio = bg_music
513
  video_clip = video_clip.set_audio(final_audio)
514
  print("Background music added successfully.")
515
  return video_clip
 
516
  except Exception as e:
517
  print(f"Error adding background music: {e}. Skipping.")
518
+ return video_clip
 
519
 
520
  def create_segment_clip(media_info, tts_path, narration_text):
521
  """Create a single video segment (clip) with visuals, audio, and subtitles."""
522
+ # --- Retain previous create_segment_clip function ---
523
  try:
524
  media_path = media_info['path']
525
  asset_type = media_info['asset_type']
526
  print(f"Creating clip segment: Type={asset_type}, Media={os.path.basename(media_path)}")
527
+ if not os.path.exists(tts_path): print(f"Error: TTS file not found: {tts_path}"); return None
 
 
 
 
528
  audio_clip = AudioFileClip(tts_path)
529
+ segment_duration = audio_clip.duration + 0.3
 
 
 
530
  if asset_type == "video":
531
+ if not os.path.exists(media_path): print(f"Error: Video file not found: {media_path}"); return None
 
 
532
  video_clip = VideoFileClip(media_path)
 
533
  if video_clip.duration < segment_duration:
 
534
  loops = math.ceil(segment_duration / video_clip.duration)
535
+ try:
536
+ # Handle potential zero duration clips during looping
537
+ if video_clip.duration > 0:
538
+ video_clip = concatenate_videoclips([video_clip] * loops)
539
+ else:
540
+ print(f"Warning: Video clip has zero duration, cannot loop: {media_path}")
541
+ # Create a short black clip instead? Or fail? Let's fail for now.
542
+ return None
543
+ except Exception as loop_err:
544
+ print(f"Error looping video {media_path}: {loop_err}")
545
+ return None # Fail segment if looping fails
546
+
547
  video_clip = video_clip.subclip(0, segment_duration)
 
548
  visual_clip = resize_to_fill(video_clip, TARGET_RESOLUTION)
 
549
  elif asset_type == "image":
550
+ if not os.path.exists(media_path): print(f"Error: Image file not found: {media_path}"); return None
 
 
 
551
  img_clip = ImageClip(media_path).set_duration(segment_duration)
 
552
  visual_clip = apply_kenburns_effect(img_clip, TARGET_RESOLUTION, segment_duration)
 
553
  visual_clip = visual_clip.resize(newsize=TARGET_RESOLUTION)
554
+ else: print(f"Error: Unknown asset type: {asset_type}"); return None
 
 
 
 
 
555
  visual_clip = visual_clip.fadein(0.15).fadeout(0.15)
 
 
556
  subtitle_clips = []
557
  if USE_CAPTIONS and narration_text:
558
  words = narration_text.split()
559
+ max_words_per_chunk = 5; chunks = []; current_chunk = []
 
 
 
560
  for word in words:
561
  current_chunk.append(word)
562
+ if len(current_chunk) >= max_words_per_chunk: chunks.append(" ".join(current_chunk)); current_chunk = []
563
+ if current_chunk: chunks.append(" ".join(current_chunk))
564
+ if not chunks: print("Warning: Narration text is empty, skipping subtitles.")
 
 
 
 
 
 
565
  else:
566
+ num_chunks = len(chunks); chunk_duration = audio_clip.duration / num_chunks
567
+ start_time = 0.1
 
 
568
  for i, chunk_text in enumerate(chunks):
569
+ end_time = min(start_time + chunk_duration, segment_duration - 0.1)
570
+ try:
571
+ txt_clip = TextClip(txt=chunk_text, fontsize=font_size, font=caption_font, color=caption_style_text_color,
572
+ bg_color=caption_style_bg_color, method='label', align='center',
573
+ size=(TARGET_RESOLUTION[0] * 0.8, None))
574
+ txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1] * 0.80))
575
+ txt_clip = txt_clip.set_start(start_time).set_duration(max(0.1, end_time - start_time)) # Ensure non-zero duration
576
+ subtitle_clips.append(txt_clip)
577
+ start_time = end_time
578
+ except Exception as txt_err:
579
+ print(f"ERROR creating TextClip for '{chunk_text}': {txt_err}. Skipping subtitle chunk.")
580
+ # If one subtitle fails, continue without it
581
+
582
+ final_clip = CompositeVideoClip([visual_clip] + subtitle_clips) if subtitle_clips else visual_clip
583
+ final_clip = final_clip.set_audio(audio_clip.set_start(0.15))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  print(f"Clip segment created successfully. Duration: {final_clip.duration:.2f}s")
585
  return final_clip
 
586
  except Exception as e:
587
  print(f"Error creating clip segment: {e}")
588
  import traceback
589
+ traceback.print_exc()
590
  return None
591
 
592
  # ---------------- Main Video Generation Function ---------------- #
593
 
594
  def generate_full_video(user_input, resolution_choice, caption_choice, music_file_info):
595
+ """Main function orchestrating the video generation process."""
596
+ # --- Retain most of previous generate_full_video function ---
597
+ # (Ensure it handles None from generate_media correctly)
 
598
  global TARGET_RESOLUTION, TEMP_FOLDER, USE_CAPTIONS
599
+ print("\n--- Starting Video Generation ---"); start_time = time.time()
600
+ if resolution_choice == "Short (9:16)": TARGET_RESOLUTION = (1080, 1920); print("Resolution set to: Short (1080x1920)")
601
+ else: TARGET_RESOLUTION = (1920, 1080); print("Resolution set to: Full HD (1920x1080)")
602
+ USE_CAPTIONS = (caption_choice == "Yes"); print(f"Captions Enabled: {USE_CAPTIONS}")
603
+ TEMP_FOLDER = tempfile.mkdtemp(prefix="aivideo_"); print(f"Temporary folder created: {TEMP_FOLDER}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
604
  music_file_path = None
605
  if music_file_info is not None:
606
  try:
 
 
607
  music_file_path = os.path.join(TEMP_FOLDER, "background_music.mp3")
608
  shutil.copy(music_file_info.name, music_file_path)
609
  print(f"Background music copied to: {music_file_path}")
610
+ except Exception as e: print(f"Error handling uploaded music file: {e}"); music_file_path = None
 
 
 
611
 
612
+ print("\nStep 1: Generating script..."); script_text = generate_script(user_input)
613
+ if not script_text: print("ERROR: Failed to generate script. Aborting."); shutil.rmtree(TEMP_FOLDER); return None, "Error: Script generation failed."
 
 
 
 
 
 
 
 
 
614
  print("Script Generated:\n", script_text)
615
 
616
+ print("\nStep 2: Parsing script..."); segments = parse_script(script_text)
617
+ if not segments: print("ERROR: Failed to parse script. Aborting."); shutil.rmtree(TEMP_FOLDER); return None, "Error: Script parsing failed."
 
 
 
 
 
 
618
  print(f"Successfully parsed {len(segments)} segments.")
619
 
 
 
620
  print("\nStep 3: Generating media and TTS for each segment...")
621
+ segment_clips = []; total_segments = len(segments)
 
622
  for i, segment in enumerate(segments):
623
  print(f"\n--- Processing Segment {i+1}/{total_segments} ---")
624
  print(f" Prompt: {segment['prompt']}")
625
  print(f" Narration: {segment['narration']}")
626
 
 
627
  media_info = generate_media(segment['prompt'])
628
+ # --- Crucial Check ---
629
  if not media_info:
630
+ print(f"Warning: Failed to get media for segment {i+1} ('{segment['prompt']}'). Skipping this segment.")
631
+ continue # Skip segment if media generation failed
632
 
 
633
  tts_path = generate_tts(segment['narration'], selected_voice, voice_speed)
634
  if not tts_path:
635
  print(f"Warning: Failed to generate TTS for segment {i+1}. Skipping segment.")
 
636
  if media_info and os.path.exists(media_info['path']):
637
+ try: os.remove(media_info['path']); print(f"Cleaned up unused media: {media_info['path']}")
638
  except OSError: pass
639
+ continue
640
 
 
641
  clip = create_segment_clip(media_info, tts_path, segment['narration'])
642
  if clip:
643
  segment_clips.append(clip)
644
  else:
645
  print(f"Warning: Failed to create video clip for segment {i+1}. Skipping.")
 
646
  if media_info and os.path.exists(media_info['path']):
647
+ try: os.remove(media_info['path']); print(f"Cleaned up media for failed clip: {media_info['path']}")
648
  except OSError: pass
649
  if tts_path and os.path.exists(tts_path):
650
+ try: os.remove(tts_path); print(f"Cleaned up TTS for failed clip: {tts_path}")
651
  except OSError: pass
652
 
 
653
  if not segment_clips:
654
  print("ERROR: No video clips were successfully created. Aborting.")
655
  shutil.rmtree(TEMP_FOLDER)
656
+ return None, "Error: Failed to create any video segments. Check logs for media/TTS issues."
 
657
 
658
+ print("\nStep 4: Concatenating video segments...");
 
659
  try:
660
+ # Filter out potential None values just in case, although the loop should prevent them
661
+ valid_clips = [c for c in segment_clips if c is not None]
662
+ if not valid_clips:
663
+ raise ValueError("No valid clips remained after processing.")
664
+ final_video = concatenate_videoclips(valid_clips, method="compose")
665
  print("Segments concatenated successfully.")
666
  except Exception as e:
667
+ print(f"ERROR: Failed to concatenate video clips: {e}"); shutil.rmtree(TEMP_FOLDER); return None, f"Error: Concatenation failed: {e}"
 
 
 
 
 
 
 
668
 
669
+ print("\nStep 5: Adding background music..."); final_video = add_background_music(final_video, music_file_path, bg_music_volume)
670
 
671
+ print(f"\nStep 6: Exporting final video to '{OUTPUT_VIDEO_FILENAME}'..."); export_success = False
 
672
  try:
673
+ final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', audio_codec='aac', fps=fps, preset=preset, threads=4, logger='bar')
 
 
 
 
 
 
 
 
674
  print(f"Final video saved successfully as {OUTPUT_VIDEO_FILENAME}")
675
  export_success = True
676
  except Exception as e:
677
+ print(f"ERROR: Failed to write final video file: {e}"); import traceback; traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
678
 
679
+ print("\nStep 7: Cleaning up temporary files...");
680
+ try: shutil.rmtree(TEMP_FOLDER); print(f"Temporary folder {TEMP_FOLDER} removed.")
681
+ except Exception as e: print(f"Warning: Failed to remove temporary folder {TEMP_FOLDER}: {e}")
682
 
683
+ end_time = time.time(); total_time = end_time - start_time
684
+ print(f"\n--- Video Generation Finished ---"); print(f"Total time: {total_time:.2f} seconds")
685
+ if export_success: return OUTPUT_VIDEO_FILENAME, f"Video generation complete! Time: {total_time:.2f}s"
686
+ else: return None, f"Error: Video export failed. Check logs. Time: {total_time:.2f}s"
 
 
 
 
 
687
 
688
 
689
  # ---------------- Gradio Interface Definition ---------------- #
690
+ # --- Retain previous Gradio Interface code ---
 
691
  VOICE_CHOICES = {
692
+ 'Emma (US Female)': 'af_heart', 'Bella (US Female)': 'af_bella', 'Nicole (US Female)': 'af_nicole',
693
+ 'Sarah (US Female)': 'af_sarah', 'Michael (US Male)': 'am_michael', 'Eric (US Male)': 'am_eric',
694
+ 'Adam (US Male)': 'am_adam', 'Emma (UK Female)': 'bf_emma', 'Alice (UK Female)': 'bf_alice',
695
+ 'George (UK Male)': 'bm_george', 'Daniel (UK Male)': 'bm_daniel',
 
 
 
 
 
 
 
 
 
696
  }
697
+ def gradio_interface_handler(user_prompt, resolution, captions, bg_music, voice_name, video_prob, music_vol, video_fps, export_preset, tts_speed, caption_size):
 
 
 
 
 
 
698
  print("\n--- Received Request from Gradio ---")
699
+ print(f"Prompt: {user_prompt[:50]}...") # Print inputs for debugging
700
+ # ... (print other inputs) ...
 
 
 
 
 
 
 
 
 
 
 
701
  global selected_voice, voice_speed, font_size, video_clip_probability, bg_music_volume, fps, preset
702
+ selected_voice = VOICE_CHOICES.get(voice_name, 'af_heart')
703
+ voice_speed = tts_speed; font_size = caption_size; video_clip_probability = video_prob / 100.0
704
+ bg_music_volume = music_vol; fps = video_fps; preset = export_preset
 
 
 
 
 
 
705
  video_path, status_message = generate_full_video(user_prompt, resolution, captions, bg_music)
 
706
  print(f"Gradio Handler Status: {status_message}")
 
 
707
  return video_path, status_message
708
 
 
 
709
  with gr.Blocks(theme=gr.themes.Soft()) as iface:
710
  gr.Markdown("# 🎬 AI Documentary Video Generator")
711
+ gr.Markdown("Enter a topic or detailed instructions, customize settings, and generate a short documentary-style video.")
 
712
  with gr.Row():
713
  with gr.Column(scale=2):
714
+ prompt_input = gr.Textbox(label="Video Concept / Topic / Script", placeholder="e.g., 'The history of coffee'...", lines=4)
 
 
 
 
715
  submit_button = gr.Button("Generate Video", variant="primary")
716
  status_output = gr.Textbox(label="Status", interactive=False)
717
+ with gr.Column(scale=1): video_output = gr.Video(label="Generated Video")
 
 
 
 
718
  with gr.Accordion("⚙️ Advanced Settings", open=False):
719
  gr.Markdown("### Video & Audio Settings")
720
  with gr.Row():
721
  resolution_dd = gr.Dropdown(["Full HD (16:9)", "Short (9:16)"], label="Resolution", value="Full HD (16:9)")
722
  caption_dd = gr.Radio(["Yes", "No"], label="Generate Captions", value="Yes")
723
  music_upload = gr.File(label="Upload Background Music (MP3)", file_types=[".mp3"])
 
724
  gr.Markdown("### Voice & Narration")
725
  with gr.Row():
726
  voice_dd = gr.Dropdown(choices=list(VOICE_CHOICES.keys()), label="Narration Voice", value="Emma (US Female)")
727
  speed_slider = gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed")
 
728
  gr.Markdown("### Visuals & Style")
729
  with gr.Row():
730
  video_prob_slider = gr.Slider(0, 100, value=35, step=5, label="Video Clip % (vs. Images)")
731
  caption_size_slider = gr.Slider(20, 80, value=45, step=1, label="Caption Font Size")
 
732
  gr.Markdown("### Export Settings")
733
  with gr.Row():
734
  music_vol_slider = gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume")
735
  fps_slider = gr.Slider(15, 60, value=30, step=1, label="Video FPS")
736
+ preset_dd = gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"], value="veryfast", label="Export Quality/Speed Preset")
737
+ submit_button.click(fn=gradio_interface_handler, inputs=[prompt_input, resolution_dd, caption_dd, music_upload, voice_dd, video_prob_slider, music_vol_slider, fps_slider, preset_dd, speed_slider, caption_size_slider], outputs=[video_output, status_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
738
 
739
  # Launch the interface
740
  if __name__ == "__main__":
741
  print("Launching Gradio Interface...")
 
742
  if PEXELS_API_KEY == 'YOUR_PEXELS_API_KEY' or OPENROUTER_API_KEY == 'YOUR_OPENROUTER_API_KEY':
743
  print("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
744
  print("!!! WARNING: API Keys not set in the script. !!!")
745
  print("!!! Please replace 'YOUR_PEXELS_API_KEY' and !!!")
746
  print("!!! 'YOUR_OPENROUTER_API_KEY' with your actual keys. !!!")
747
  print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
748
+ iface.launch(share=True, debug=True)