testdeep123 commited on
Commit
6fa5ad1
·
verified ·
1 Parent(s): ab04136

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -212
app.py CHANGED
@@ -1,158 +1,95 @@
1
-
2
-
3
- # Import necessary libraries
4
- from kokoro import KPipeline
5
-
6
- import soundfile as sf
7
- import torch
8
-
9
- import soundfile as sf
10
  import os
11
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
12
- from PIL import Image
13
  import tempfile
14
  import random
15
- import cv2
16
  import math
17
- import os, requests, io, time, re, random
 
 
 
 
 
 
 
 
 
 
18
  from moviepy.editor import (
19
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
20
  CompositeVideoClip, TextClip, CompositeAudioClip
21
  )
22
-
23
  import moviepy.video.fx.all as vfx
24
  import moviepy.config as mpy_config
25
- from pydub import AudioSegment
26
- from pydub.generators import Sine
27
 
28
- from PIL import Image, ImageDraw, ImageFont
29
- import numpy as np
30
- from bs4 import BeautifulSoup
31
- import base64
32
- from urllib.parse import quote
33
- import pysrt
34
- from gtts import gTTS
35
- import gradio as gr # Import Gradio
36
-
37
- # Initialize Kokoro TTS pipeline (using American English)
38
- pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
39
- # Ensure ImageMagick binary is set
40
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
41
 
42
- # ---------------- Global Configuration ---------------- #
43
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
44
  OPENROUTER_API_KEY = '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'
45
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
46
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
47
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
48
 
49
- # ---------------- Helper Functions ---------------- #
50
- # (Your existing helper functions remain unchanged: generate_script, parse_script,
51
- # search_pexels_videos, search_pexels_images, search_google_images, download_image,
52
- # download_video, generate_media, generate_tts, apply_kenburns_effect,
53
- # resize_to_fill, find_mp3_files, add_background_music, create_clip,
54
- # fix_imagemagick_policy)
55
-
56
- # Define these globally as they were in your original code but will be set per run
57
  TARGET_RESOLUTION = None
58
  CAPTION_COLOR = None
 
59
  TEMP_FOLDER = None
60
 
 
61
  def generate_script(user_input):
62
- """Generate documentary script with proper OpenRouter handling."""
63
  headers = {
64
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
65
  'HTTP-Referer': 'https://your-domain.com',
66
  'X-Title': 'AI Documentary Maker'
67
  }
68
-
69
  prompt = f"""Short Documentary Script GeneratorInstructions:
70
-
71
  If I say "use this," just output the script exactly as I gave it.
72
  If I only give topics, generate a script based on them.
73
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
74
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
75
  Formatting Rules:
76
-
77
-
78
  Title in Square Brackets:
79
-
80
-
81
  Each section starts with a one-word title inside [ ] (max two words if necessary).
82
  This title will be used as a search term for Pexels footage.
83
-
84
-
85
-
86
  Casual & Funny Narration:
87
-
88
-
89
  Each section has 5-10 words of narration.
90
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
91
-
92
-
93
-
94
  No Special Formatting:
95
-
96
-
97
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
98
-
99
-
100
-
101
  Generalized Search Terms:
102
-
103
-
104
  If a term is too specific, make it more general for Pexels search.
105
-
106
-
107
-
108
  Scene-Specific Writing:
109
-
110
-
111
  Each section describes only what should be shown in the video.
112
-
113
-
114
-
115
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
116
-
117
-
118
  No extra text, just the script.
119
-
120
-
121
-
122
  Example Output:
123
  [North Korea]
124
-
125
  Top 5 unknown facts about North Korea.
126
-
127
  [Invisibility]
128
-
129
  North Korea’s internet speed is so fast… it doesn’t exist.
130
-
131
  [Leadership]
132
-
133
  Kim Jong-un once won an election with 100% votes… against himself.
134
-
135
  [Magic]
136
-
137
  North Korea discovered time travel. That’s why their news is always from the past.
138
-
139
  [Warning]
140
-
141
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
142
-
143
  [Freedom]
144
-
145
  North Korean citizens can do anything… as long as it's government-approved.
146
  Now here is the Topic/scrip: {user_input}
147
  """
148
-
149
  data = {
150
  'model': OPENROUTER_MODEL,
151
  'messages': [{'role': 'user', 'content': prompt}],
152
  'temperature': 0.4,
153
  'max_tokens': 5000
154
  }
155
-
156
  try:
157
  response = requests.post(
158
  'https://openrouter.ai/api/v1/chat/completions',
@@ -160,7 +97,6 @@ Now here is the Topic/scrip: {user_input}
160
  json=data,
161
  timeout=30
162
  )
163
-
164
  if response.status_code == 200:
165
  response_data = response.json()
166
  if 'choices' in response_data and len(response_data['choices']) > 0:
@@ -171,22 +107,14 @@ Now here is the Topic/scrip: {user_input}
171
  else:
172
  print(f"API Error {response.status_code}: {response.text}")
173
  return None
174
-
175
  except Exception as e:
176
  print(f"Request failed: {str(e)}")
177
  return None
178
 
179
  def parse_script(script_text):
180
- """
181
- Parse the generated script into a list of elements.
182
- For each section, create two elements:
183
- - A 'media' element using the section title as the visual prompt.
184
- - A 'tts' element with the narration text, voice info, and computed duration.
185
- """
186
  sections = {}
187
  current_title = None
188
  current_text = ""
189
-
190
  try:
191
  for line in script_text.splitlines():
192
  line = line.strip()
@@ -200,63 +128,50 @@ def parse_script(script_text):
200
  current_text = line[bracket_end+1:].strip()
201
  elif current_title:
202
  current_text += line + " "
203
-
204
  if current_title:
205
  sections[current_title] = current_text.strip()
206
-
207
  elements = []
208
  for title, narration in sections.items():
209
  if not title or not narration:
210
  continue
211
-
212
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
213
  words = narration.split()
214
  duration = max(3, len(words) * 0.5)
215
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
216
  elements.append(media_element)
217
  elements.append(tts_element)
218
-
219
  return elements
220
  except Exception as e:
221
  print(f"Error parsing script: {e}")
222
  return []
223
 
224
  def search_pexels_videos(query, pexels_api_key):
225
- """Search for a video on Pexels by query and return a random HD video."""
226
  headers = {'Authorization': pexels_api_key}
227
  base_url = "https://api.pexels.com/videos/search"
228
  num_pages = 3
229
  videos_per_page = 15
230
-
231
  max_retries = 3
232
  retry_delay = 1
233
-
234
  search_query = query
235
  all_videos = []
236
-
237
  for page in range(1, num_pages + 1):
238
  for attempt in range(max_retries):
239
  try:
240
  params = {"query": search_query, "per_page": videos_per_page, "page": page}
241
  response = requests.get(base_url, headers=headers, params=params, timeout=10)
242
-
243
  if response.status_code == 200:
244
  data = response.json()
245
  videos = data.get("videos", [])
246
-
247
  if not videos:
248
  print(f"No videos found on page {page}.")
249
  break
250
-
251
  for video in videos:
252
  video_files = video.get("video_files", [])
253
  for file in video_files:
254
  if file.get("quality") == "hd":
255
  all_videos.append(file.get("link"))
256
  break
257
-
258
  break
259
-
260
  elif response.status_code == 429:
261
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
262
  time.sleep(retry_delay)
@@ -269,7 +184,6 @@ def search_pexels_videos(query, pexels_api_key):
269
  retry_delay *= 2
270
  else:
271
  break
272
-
273
  except requests.exceptions.RequestException as e:
274
  print(f"Request exception: {e}")
275
  if attempt < max_retries - 1:
@@ -278,7 +192,6 @@ def search_pexels_videos(query, pexels_api_key):
278
  retry_delay *= 2
279
  else:
280
  break
281
-
282
  if all_videos:
283
  random_video = random.choice(all_videos)
284
  print(f"Selected random video from {len(all_videos)} HD videos")
@@ -288,18 +201,14 @@ def search_pexels_videos(query, pexels_api_key):
288
  return None
289
 
290
  def search_pexels_images(query, pexels_api_key):
291
- """Search for an image on Pexels by query."""
292
  headers = {'Authorization': pexels_api_key}
293
  url = "https://api.pexels.com/v1/search"
294
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
295
-
296
  max_retries = 3
297
  retry_delay = 1
298
-
299
  for attempt in range(max_retries):
300
  try:
301
  response = requests.get(url, headers=headers, params=params, timeout=10)
302
-
303
  if response.status_code == 200:
304
  data = response.json()
305
  photos = data.get("photos", [])
@@ -310,7 +219,6 @@ def search_pexels_images(query, pexels_api_key):
310
  else:
311
  print(f"No images found for query: {query}")
312
  return None
313
-
314
  elif response.status_code == 429:
315
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
316
  time.sleep(retry_delay)
@@ -321,32 +229,27 @@ def search_pexels_images(query, pexels_api_key):
321
  print(f"Retrying in {retry_delay} seconds...")
322
  time.sleep(retry_delay)
323
  retry_delay *= 2
324
-
325
  except requests.exceptions.RequestException as e:
326
  print(f"Request exception: {e}")
327
  if attempt < max_retries - 1:
328
  print(f"Retrying in {retry_delay} seconds...")
329
  time.sleep(retry_delay)
330
  retry_delay *= 2
331
-
332
  print(f"No Pexels images found for query: {query} after all attempts")
333
  return None
334
 
335
  def search_google_images(query):
336
- """Search for images on Google Images (for news-related queries)"""
337
  try:
338
  search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
339
  headers = {"User-Agent": USER_AGENT}
340
  response = requests.get(search_url, headers=headers, timeout=10)
341
  soup = BeautifulSoup(response.text, "html.parser")
342
-
343
  img_tags = soup.find_all("img")
344
  image_urls = []
345
  for img in img_tags:
346
  src = img.get("src", "")
347
  if src.startswith("http") and "gstatic" not in src:
348
  image_urls.append(src)
349
-
350
  if image_urls:
351
  return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
352
  else:
@@ -357,19 +260,15 @@ def search_google_images(query):
357
  return None
358
 
359
  def download_image(image_url, filename):
360
- """Download an image from a URL to a local file with enhanced error handling."""
361
  try:
362
  headers = {"User-Agent": USER_AGENT}
363
  print(f"Downloading image from: {image_url} to {filename}")
364
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
365
  response.raise_for_status()
366
-
367
  with open(filename, 'wb') as f:
368
  for chunk in response.iter_content(chunk_size=8192):
369
  f.write(chunk)
370
-
371
  print(f"Image downloaded successfully to: {filename}")
372
-
373
  try:
374
  img = Image.open(filename)
375
  img.verify()
@@ -384,7 +283,6 @@ def download_image(image_url, filename):
384
  if os.path.exists(filename):
385
  os.remove(filename)
386
  return None
387
-
388
  except requests.exceptions.RequestException as e_download:
389
  print(f"Image download error: {e_download}")
390
  if os.path.exists(filename):
@@ -397,7 +295,6 @@ def download_image(image_url, filename):
397
  return None
398
 
399
  def download_video(video_url, filename):
400
- """Download a video from a URL to a local file."""
401
  try:
402
  response = requests.get(video_url, stream=True, timeout=30)
403
  response.raise_for_status()
@@ -413,13 +310,7 @@ def download_video(video_url, filename):
413
  return None
414
 
415
  def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
416
- """
417
- Generate a visual asset by first searching for a video or using a specific search strategy.
418
- For news-related queries, use Google Images.
419
- Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
420
- """
421
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
422
-
423
  if "news" in prompt.lower():
424
  print(f"News-related query detected: {prompt}. Using Google Images...")
425
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
@@ -431,7 +322,6 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
431
  return {"path": downloaded_image, "asset_type": "image"}
432
  else:
433
  print(f"Google Images search failed for prompt: {prompt}")
434
-
435
  if random.random() < 0.25:
436
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
437
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
@@ -442,7 +332,6 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
442
  return {"path": downloaded_video, "asset_type": "video"}
443
  else:
444
  print(f"Pexels video search failed for prompt: {prompt}")
445
-
446
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
447
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
448
  if image_url:
@@ -452,7 +341,6 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
452
  return {"path": downloaded_image, "asset_type": "image"}
453
  else:
454
  print(f"Pexels image download failed for prompt: {prompt}")
455
-
456
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
457
  for term in fallback_terms:
458
  print(f"Trying fallback image search with term: {term}")
@@ -467,12 +355,10 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
467
  print(f"Fallback image download failed for term: {term}")
468
  else:
469
  print(f"Fallback image search failed for term: {term}")
470
-
471
  print(f"Failed to generate visual asset for prompt: {prompt}")
472
  return None
473
 
474
  def generate_silent_audio(duration, sample_rate=24000):
475
- """Generate a silent WAV audio file lasting 'duration' seconds."""
476
  num_samples = int(duration * sample_rate)
477
  silence = np.zeros(num_samples, dtype=np.float32)
478
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
@@ -480,20 +366,15 @@ def generate_silent_audio(duration, sample_rate=24000):
480
  print(f"Silent audio generated: {silent_path}")
481
  return silent_path
482
 
483
- def generate_tts(text, voice):
484
- """
485
- Generate TTS audio using Kokoro, falling back to gTTS or silent audio if needed.
486
- """
487
  safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
488
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
489
-
490
  if os.path.exists(file_path):
491
  print(f"Using cached TTS for text '{text[:10]}...'")
492
  return file_path
493
-
494
  try:
495
- kokoro_voice = 'af_heart' if voice == 'en' else voice
496
- generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
497
  audio_segments = []
498
  for i, (gs, ps, audio) in enumerate(generator):
499
  audio_segments.append(audio)
@@ -518,31 +399,25 @@ def generate_tts(text, voice):
518
  return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
519
 
520
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
521
- """Apply a smooth Ken Burns effect with a single movement pattern."""
522
  target_w, target_h = target_resolution
523
  clip_aspect = clip.w / clip.h
524
  target_aspect = target_w / target_h
525
-
526
  if clip_aspect > target_aspect:
527
  new_height = target_h
528
  new_width = int(new_height * clip_aspect)
529
  else:
530
  new_width = target_w
531
  new_height = int(new_width / clip_aspect)
532
-
533
  clip = clip.resize(newsize=(new_width, new_height))
534
  base_scale = 1.15
535
  new_width = int(new_width * base_scale)
536
  new_height = int(new_height * base_scale)
537
  clip = clip.resize(newsize=(new_width, new_height))
538
-
539
  max_offset_x = new_width - target_w
540
  max_offset_y = new_height - target_h
541
-
542
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
543
  if effect_type is None or effect_type == "random":
544
  effect_type = random.choice(available_effects)
545
-
546
  if effect_type == "zoom-in":
547
  start_zoom = 0.9
548
  end_zoom = 1.1
@@ -570,7 +445,6 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
570
  end_center = (target_w / 2, target_h / 2)
571
  else:
572
  raise ValueError(f"Unsupported effect_type: {effect_type}")
573
-
574
  def transform_frame(get_frame, t):
575
  frame = get_frame(t)
576
  ratio = t / clip.duration if clip.duration > 0 else 0
@@ -589,15 +463,12 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
589
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
590
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
591
  return resized_frame
592
-
593
  return clip.fl(transform_frame)
594
 
595
  def resize_to_fill(clip, target_resolution):
596
- """Resize and crop a clip to fill the target resolution while maintaining aspect ratio."""
597
  target_w, target_h = target_resolution
598
  clip_aspect = clip.w / clip.h
599
  target_aspect = target_w / target_h
600
-
601
  if clip_aspect > target_aspect:
602
  clip = clip.resize(height=target_h)
603
  crop_amount = (clip.w - target_w) / 2
@@ -606,22 +477,9 @@ def resize_to_fill(clip, target_resolution):
606
  clip = clip.resize(width=target_w)
607
  crop_amount = (clip.h - target_h) / 2
608
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
609
-
610
  return clip
611
 
612
- def find_mp3_files():
613
- """Search for any MP3 files in the current directory and subdirectories."""
614
- mp3_files = []
615
- for root, dirs, files in os.walk('.'):
616
- for file in files:
617
- if file.endswith('.mp3'):
618
- mp3_path = os.path.join(root, file)
619
- mp3_files.append(mp3_path)
620
- print(f"Found MP3 file: {mp3_path}")
621
- return mp3_files[0] if mp3_files else None
622
-
623
  def add_background_music(final_video, bg_music_volume=0.10):
624
- """Add background music to the final video using any MP3 file found."""
625
  try:
626
  bg_music_path = "music.mp3"
627
  if bg_music_path and os.path.exists(bg_music_path):
@@ -645,18 +503,15 @@ def add_background_music(final_video, bg_music_volume=0.10):
645
  print("Continuing without background music")
646
  return final_video
647
 
648
- def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
649
- """Create a video clip with synchronized subtitles and narration."""
650
  try:
651
  print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
652
  if not os.path.exists(media_path) or not os.path.exists(tts_path):
653
  print("Missing media or TTS file")
654
  return None
655
-
656
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
657
  audio_duration = audio_clip.duration
658
  target_duration = audio_duration + 0.2
659
-
660
  if asset_type == "video":
661
  clip = VideoFileClip(media_path)
662
  clip = resize_to_fill(clip, TARGET_RESOLUTION)
@@ -676,8 +531,7 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
676
  clip = clip.fadein(0.3).fadeout(0.3)
677
  else:
678
  return None
679
-
680
- if narration_text and CAPTION_COLOR != "transparent":
681
  try:
682
  words = narration_text.split()
683
  chunks = []
@@ -689,42 +543,42 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
689
  current_chunk = []
690
  if current_chunk:
691
  chunks.append(' '.join(current_chunk))
692
-
693
  chunk_duration = audio_duration / len(chunks)
694
  subtitle_clips = []
695
  subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
696
-
697
  for i, chunk_text in enumerate(chunks):
698
  start_time = i * chunk_duration
699
  end_time = (i + 1) * chunk_duration
700
  txt_clip = TextClip(
701
  chunk_text,
702
- fontsize=45,
703
  font='Arial-Bold',
704
- color=CAPTION_COLOR,
705
  bg_color='rgba(0, 0, 0, 0.25)',
706
  method='caption',
707
  align='center',
708
  stroke_width=2,
709
- stroke_color=CAPTION_COLOR,
710
  size=(TARGET_RESOLUTION[0] * 0.8, None)
711
  ).set_start(start_time).set_end(end_time)
712
  txt_clip = txt_clip.set_position(('center', subtitle_y_position))
713
  subtitle_clips.append(txt_clip)
714
-
715
  clip = CompositeVideoClip([clip] + subtitle_clips)
716
  except Exception as sub_error:
717
  print(f"Subtitle error: {sub_error}")
718
  txt_clip = TextClip(
719
  narration_text,
720
- fontsize=28,
721
- color=CAPTION_COLOR,
722
  align='center',
723
  size=(TARGET_RESOLUTION[0] * 0.7, None)
724
  ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
725
  clip = CompositeVideoClip([clip, txt_clip])
726
-
727
  clip = clip.set_audio(audio_clip)
 
 
 
 
728
  print(f"Clip created: {clip.duration:.1f}s")
729
  return clip
730
  except Exception as e:
@@ -732,7 +586,6 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
732
  return None
733
 
734
  def fix_imagemagick_policy():
735
- """Fix ImageMagick security policies."""
736
  try:
737
  print("Attempting to fix ImageMagick security policies...")
738
  policy_paths = [
@@ -756,31 +609,45 @@ def fix_imagemagick_policy():
756
  print(f"Error fixing policies: {e}")
757
  return False
758
 
759
- # ---------------- Main Function with Gradio Integration ---------------- #
760
- def generate_video(user_input, resolution, caption_option):
761
- """Generate a video based on user input via Gradio."""
762
- global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
763
- import shutil
 
 
 
 
 
 
 
 
 
 
764
 
 
 
 
765
  # Set resolution
766
  if resolution == "Full":
767
  TARGET_RESOLUTION = (1920, 1080)
768
  elif resolution == "Short":
769
  TARGET_RESOLUTION = (1080, 1920)
 
 
770
  else:
771
- TARGET_RESOLUTION = (1920, 1080) # Default
772
-
773
- # Set caption color
774
- CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
775
-
776
  # Create a unique temporary folder
777
  TEMP_FOLDER = tempfile.mkdtemp()
778
-
 
779
  # Fix ImageMagick policy
780
  fix_success = fix_imagemagick_policy()
781
  if not fix_success:
782
  print("Will use alternative methods if needed")
783
-
784
  print("Generating script from API...")
785
  script = generate_script(user_input)
786
  if not script:
@@ -794,17 +661,14 @@ def generate_video(user_input, resolution, caption_option):
794
  shutil.rmtree(TEMP_FOLDER)
795
  return None
796
  print(f"Parsed {len(elements)//2} script segments.")
797
-
798
  paired_elements = []
799
  for i in range(0, len(elements), 2):
800
  if i + 1 < len(elements):
801
  paired_elements.append((elements[i], elements[i + 1]))
802
-
803
  if not paired_elements:
804
  print("No valid script segments found.")
805
  shutil.rmtree(TEMP_FOLDER)
806
  return None
807
-
808
  clips = []
809
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
810
  print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
@@ -812,7 +676,7 @@ def generate_video(user_input, resolution, caption_option):
812
  if not media_asset:
813
  print(f"Skipping segment {idx+1} due to missing media asset.")
814
  continue
815
- tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
816
  if not tts_path:
817
  print(f"Skipping segment {idx+1} due to TTS generation failure.")
818
  continue
@@ -823,45 +687,116 @@ def generate_video(user_input, resolution, caption_option):
823
  duration=tts_elem['duration'],
824
  effects=media_elem.get('effects', 'fade-in'),
825
  narration_text=tts_elem['text'],
826
- segment_index=idx
 
 
 
827
  )
828
  if clip:
829
  clips.append(clip)
830
  else:
831
  print(f"Clip creation failed for segment {idx+1}.")
832
-
833
  if not clips:
834
  print("No clips were successfully created.")
835
  shutil.rmtree(TEMP_FOLDER)
836
  return None
837
-
838
  print("\nConcatenating clips...")
839
- final_video = concatenate_videoclips(clips, method="compose")
840
  final_video = add_background_music(final_video, bg_music_volume=0.08)
841
-
842
  print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
843
- final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=30, preset='veryfast')
 
 
 
 
 
 
844
  print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
845
-
846
  # Clean up
847
  print("Cleaning up temporary files...")
848
  shutil.rmtree(TEMP_FOLDER)
 
 
 
849
  print("Temporary files removed.")
850
-
851
  return OUTPUT_VIDEO_FILENAME
852
 
853
- # ---------------- Gradio Interface ---------------- #
854
  iface = gr.Interface(
855
  fn=generate_video,
856
  inputs=[
857
  gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
858
- gr.Radio(["Full", "Short"], label="Resolution", value="Full"),
859
- gr.Radio(["Yes", "No"], label="Captions", value="Yes")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
860
  ],
861
  outputs=gr.Video(label="Generated Video"),
862
  title="AI Documentary Video Generator",
863
- description="Create a funny documentary-style video based on your concept. Note: Generation may take several minutes on CPU."
864
  )
865
 
866
  # Launch the interface
867
- iface.launch(share=False)
 
 
 
 
 
1
+ import asyncio
2
+ import platform
 
 
 
 
 
 
 
3
  import os
4
+ import shutil
 
5
  import tempfile
6
  import random
7
+ import re
8
  import math
9
+ import time
10
+ import requests
11
+ from urllib.parse import quote
12
+ from bs4 import BeautifulSoup
13
+ import numpy as np
14
+ from PIL import Image, ImageDraw, ImageFont
15
+ import cv2
16
+ import soundfile as sf
17
+ from pydub import AudioSegment
18
+ from gtts import gTTS
19
+ from kokoro import KPipeline
20
  from moviepy.editor import (
21
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
22
  CompositeVideoClip, TextClip, CompositeAudioClip
23
  )
 
24
  import moviepy.video.fx.all as vfx
25
  import moviepy.config as mpy_config
26
+ import gradio as gr
 
27
 
28
+ # Initialize Kokoro TTS pipeline
29
+ pipeline = KPipeline(lang_code='a')
 
 
 
 
 
 
 
 
 
 
30
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
31
 
32
+ # Global Configuration
33
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
34
  OPENROUTER_API_KEY = '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'
35
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
36
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
37
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
38
 
39
+ # Global Variables
 
 
 
 
 
 
 
40
  TARGET_RESOLUTION = None
41
  CAPTION_COLOR = None
42
+ CAPTION_FONT_SIZE = None
43
  TEMP_FOLDER = None
44
 
45
+ # Helper Functions (unchanged from original code)
46
  def generate_script(user_input):
 
47
  headers = {
48
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
49
  'HTTP-Referer': 'https://your-domain.com',
50
  'X-Title': 'AI Documentary Maker'
51
  }
 
52
  prompt = f"""Short Documentary Script GeneratorInstructions:
 
53
  If I say "use this," just output the script exactly as I gave it.
54
  If I only give topics, generate a script based on them.
55
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
56
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
57
  Formatting Rules:
 
 
58
  Title in Square Brackets:
 
 
59
  Each section starts with a one-word title inside [ ] (max two words if necessary).
60
  This title will be used as a search term for Pexels footage.
 
 
 
61
  Casual & Funny Narration:
 
 
62
  Each section has 5-10 words of narration.
63
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 
 
 
64
  No Special Formatting:
 
 
65
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 
 
 
66
  Generalized Search Terms:
 
 
67
  If a term is too specific, make it more general for Pexels search.
 
 
 
68
  Scene-Specific Writing:
 
 
69
  Each section describes only what should be shown in the video.
 
 
 
70
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 
 
71
  No extra text, just the script.
 
 
 
72
  Example Output:
73
  [North Korea]
 
74
  Top 5 unknown facts about North Korea.
 
75
  [Invisibility]
 
76
  North Korea’s internet speed is so fast… it doesn’t exist.
 
77
  [Leadership]
 
78
  Kim Jong-un once won an election with 100% votes… against himself.
 
79
  [Magic]
 
80
  North Korea discovered time travel. That’s why their news is always from the past.
 
81
  [Warning]
 
82
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 
83
  [Freedom]
 
84
  North Korean citizens can do anything… as long as it's government-approved.
85
  Now here is the Topic/scrip: {user_input}
86
  """
 
87
  data = {
88
  'model': OPENROUTER_MODEL,
89
  'messages': [{'role': 'user', 'content': prompt}],
90
  'temperature': 0.4,
91
  'max_tokens': 5000
92
  }
 
93
  try:
94
  response = requests.post(
95
  'https://openrouter.ai/api/v1/chat/completions',
 
97
  json=data,
98
  timeout=30
99
  )
 
100
  if response.status_code == 200:
101
  response_data = response.json()
102
  if 'choices' in response_data and len(response_data['choices']) > 0:
 
107
  else:
108
  print(f"API Error {response.status_code}: {response.text}")
109
  return None
 
110
  except Exception as e:
111
  print(f"Request failed: {str(e)}")
112
  return None
113
 
114
  def parse_script(script_text):
 
 
 
 
 
 
115
  sections = {}
116
  current_title = None
117
  current_text = ""
 
118
  try:
119
  for line in script_text.splitlines():
120
  line = line.strip()
 
128
  current_text = line[bracket_end+1:].strip()
129
  elif current_title:
130
  current_text += line + " "
 
131
  if current_title:
132
  sections[current_title] = current_text.strip()
 
133
  elements = []
134
  for title, narration in sections.items():
135
  if not title or not narration:
136
  continue
 
137
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
138
  words = narration.split()
139
  duration = max(3, len(words) * 0.5)
140
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
141
  elements.append(media_element)
142
  elements.append(tts_element)
 
143
  return elements
144
  except Exception as e:
145
  print(f"Error parsing script: {e}")
146
  return []
147
 
148
  def search_pexels_videos(query, pexels_api_key):
 
149
  headers = {'Authorization': pexels_api_key}
150
  base_url = "https://api.pexels.com/videos/search"
151
  num_pages = 3
152
  videos_per_page = 15
 
153
  max_retries = 3
154
  retry_delay = 1
 
155
  search_query = query
156
  all_videos = []
 
157
  for page in range(1, num_pages + 1):
158
  for attempt in range(max_retries):
159
  try:
160
  params = {"query": search_query, "per_page": videos_per_page, "page": page}
161
  response = requests.get(base_url, headers=headers, params=params, timeout=10)
 
162
  if response.status_code == 200:
163
  data = response.json()
164
  videos = data.get("videos", [])
 
165
  if not videos:
166
  print(f"No videos found on page {page}.")
167
  break
 
168
  for video in videos:
169
  video_files = video.get("video_files", [])
170
  for file in video_files:
171
  if file.get("quality") == "hd":
172
  all_videos.append(file.get("link"))
173
  break
 
174
  break
 
175
  elif response.status_code == 429:
176
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
177
  time.sleep(retry_delay)
 
184
  retry_delay *= 2
185
  else:
186
  break
 
187
  except requests.exceptions.RequestException as e:
188
  print(f"Request exception: {e}")
189
  if attempt < max_retries - 1:
 
192
  retry_delay *= 2
193
  else:
194
  break
 
195
  if all_videos:
196
  random_video = random.choice(all_videos)
197
  print(f"Selected random video from {len(all_videos)} HD videos")
 
201
  return None
202
 
203
  def search_pexels_images(query, pexels_api_key):
 
204
  headers = {'Authorization': pexels_api_key}
205
  url = "https://api.pexels.com/v1/search"
206
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
 
207
  max_retries = 3
208
  retry_delay = 1
 
209
  for attempt in range(max_retries):
210
  try:
211
  response = requests.get(url, headers=headers, params=params, timeout=10)
 
212
  if response.status_code == 200:
213
  data = response.json()
214
  photos = data.get("photos", [])
 
219
  else:
220
  print(f"No images found for query: {query}")
221
  return None
 
222
  elif response.status_code == 429:
223
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
224
  time.sleep(retry_delay)
 
229
  print(f"Retrying in {retry_delay} seconds...")
230
  time.sleep(retry_delay)
231
  retry_delay *= 2
 
232
  except requests.exceptions.RequestException as e:
233
  print(f"Request exception: {e}")
234
  if attempt < max_retries - 1:
235
  print(f"Retrying in {retry_delay} seconds...")
236
  time.sleep(retry_delay)
237
  retry_delay *= 2
 
238
  print(f"No Pexels images found for query: {query} after all attempts")
239
  return None
240
 
241
  def search_google_images(query):
 
242
  try:
243
  search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
244
  headers = {"User-Agent": USER_AGENT}
245
  response = requests.get(search_url, headers=headers, timeout=10)
246
  soup = BeautifulSoup(response.text, "html.parser")
 
247
  img_tags = soup.find_all("img")
248
  image_urls = []
249
  for img in img_tags:
250
  src = img.get("src", "")
251
  if src.startswith("http") and "gstatic" not in src:
252
  image_urls.append(src)
 
253
  if image_urls:
254
  return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
255
  else:
 
260
  return None
261
 
262
  def download_image(image_url, filename):
 
263
  try:
264
  headers = {"User-Agent": USER_AGENT}
265
  print(f"Downloading image from: {image_url} to {filename}")
266
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
267
  response.raise_for_status()
 
268
  with open(filename, 'wb') as f:
269
  for chunk in response.iter_content(chunk_size=8192):
270
  f.write(chunk)
 
271
  print(f"Image downloaded successfully to: {filename}")
 
272
  try:
273
  img = Image.open(filename)
274
  img.verify()
 
283
  if os.path.exists(filename):
284
  os.remove(filename)
285
  return None
 
286
  except requests.exceptions.RequestException as e_download:
287
  print(f"Image download error: {e_download}")
288
  if os.path.exists(filename):
 
295
  return None
296
 
297
  def download_video(video_url, filename):
 
298
  try:
299
  response = requests.get(video_url, stream=True, timeout=30)
300
  response.raise_for_status()
 
310
  return None
311
 
312
  def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
 
 
 
 
 
313
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
 
314
  if "news" in prompt.lower():
315
  print(f"News-related query detected: {prompt}. Using Google Images...")
316
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
 
322
  return {"path": downloaded_image, "asset_type": "image"}
323
  else:
324
  print(f"Google Images search failed for prompt: {prompt}")
 
325
  if random.random() < 0.25:
326
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
327
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
 
332
  return {"path": downloaded_video, "asset_type": "video"}
333
  else:
334
  print(f"Pexels video search failed for prompt: {prompt}")
 
335
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
336
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
337
  if image_url:
 
341
  return {"path": downloaded_image, "asset_type": "image"}
342
  else:
343
  print(f"Pexels image download failed for prompt: {prompt}")
 
344
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
345
  for term in fallback_terms:
346
  print(f"Trying fallback image search with term: {term}")
 
355
  print(f"Fallback image download failed for term: {term}")
356
  else:
357
  print(f"Fallback image search failed for term: {term}")
 
358
  print(f"Failed to generate visual asset for prompt: {prompt}")
359
  return None
360
 
361
  def generate_silent_audio(duration, sample_rate=24000):
 
362
  num_samples = int(duration * sample_rate)
363
  silence = np.zeros(num_samples, dtype=np.float32)
364
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
 
366
  print(f"Silent audio generated: {silent_path}")
367
  return silent_path
368
 
369
+ def generate_tts(text, voice, voice_speed):
 
 
 
370
  safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
371
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
 
372
  if os.path.exists(file_path):
373
  print(f"Using cached TTS for text '{text[:10]}...'")
374
  return file_path
 
375
  try:
376
+ kokoro_voice = voice
377
+ generator = pipeline(text, voice=kokoro_voice, speed=voice_speed, split_pattern=r'\n+')
378
  audio_segments = []
379
  for i, (gs, ps, audio) in enumerate(generator):
380
  audio_segments.append(audio)
 
399
  return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
400
 
401
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
 
402
  target_w, target_h = target_resolution
403
  clip_aspect = clip.w / clip.h
404
  target_aspect = target_w / target_h
 
405
  if clip_aspect > target_aspect:
406
  new_height = target_h
407
  new_width = int(new_height * clip_aspect)
408
  else:
409
  new_width = target_w
410
  new_height = int(new_width / clip_aspect)
 
411
  clip = clip.resize(newsize=(new_width, new_height))
412
  base_scale = 1.15
413
  new_width = int(new_width * base_scale)
414
  new_height = int(new_height * base_scale)
415
  clip = clip.resize(newsize=(new_width, new_height))
 
416
  max_offset_x = new_width - target_w
417
  max_offset_y = new_height - target_h
 
418
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
419
  if effect_type is None or effect_type == "random":
420
  effect_type = random.choice(available_effects)
 
421
  if effect_type == "zoom-in":
422
  start_zoom = 0.9
423
  end_zoom = 1.1
 
445
  end_center = (target_w / 2, target_h / 2)
446
  else:
447
  raise ValueError(f"Unsupported effect_type: {effect_type}")
 
448
  def transform_frame(get_frame, t):
449
  frame = get_frame(t)
450
  ratio = t / clip.duration if clip.duration > 0 else 0
 
463
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
464
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
465
  return resized_frame
 
466
  return clip.fl(transform_frame)
467
 
468
  def resize_to_fill(clip, target_resolution):
 
469
  target_w, target_h = target_resolution
470
  clip_aspect = clip.w / clip.h
471
  target_aspect = target_w / target_h
 
472
  if clip_aspect > target_aspect:
473
  clip = clip.resize(height=target_h)
474
  crop_amount = (clip.w - target_w) / 2
 
477
  clip = clip.resize(width=target_w)
478
  crop_amount = (clip.h - target_h) / 2
479
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
 
480
  return clip
481
 
 
 
 
 
 
 
 
 
 
 
 
482
  def add_background_music(final_video, bg_music_volume=0.10):
 
483
  try:
484
  bg_music_path = "music.mp3"
485
  if bg_music_path and os.path.exists(bg_music_path):
 
503
  print("Continuing without background music")
504
  return final_video
505
 
506
+ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0, caption_color="white", caption_font_size=45, transition_effect="fade"):
 
507
  try:
508
  print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
509
  if not os.path.exists(media_path) or not os.path.exists(tts_path):
510
  print("Missing media or TTS file")
511
  return None
 
512
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
513
  audio_duration = audio_clip.duration
514
  target_duration = audio_duration + 0.2
 
515
  if asset_type == "video":
516
  clip = VideoFileClip(media_path)
517
  clip = resize_to_fill(clip, TARGET_RESOLUTION)
 
531
  clip = clip.fadein(0.3).fadeout(0.3)
532
  else:
533
  return None
534
+ if narration_text and caption_color != "transparent":
 
535
  try:
536
  words = narration_text.split()
537
  chunks = []
 
543
  current_chunk = []
544
  if current_chunk:
545
  chunks.append(' '.join(current_chunk))
 
546
  chunk_duration = audio_duration / len(chunks)
547
  subtitle_clips = []
548
  subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
 
549
  for i, chunk_text in enumerate(chunks):
550
  start_time = i * chunk_duration
551
  end_time = (i + 1) * chunk_duration
552
  txt_clip = TextClip(
553
  chunk_text,
554
+ fontsize=caption_font_size,
555
  font='Arial-Bold',
556
+ color=caption_color,
557
  bg_color='rgba(0, 0, 0, 0.25)',
558
  method='caption',
559
  align='center',
560
  stroke_width=2,
561
+ stroke_color=caption_color,
562
  size=(TARGET_RESOLUTION[0] * 0.8, None)
563
  ).set_start(start_time).set_end(end_time)
564
  txt_clip = txt_clip.set_position(('center', subtitle_y_position))
565
  subtitle_clips.append(txt_clip)
 
566
  clip = CompositeVideoClip([clip] + subtitle_clips)
567
  except Exception as sub_error:
568
  print(f"Subtitle error: {sub_error}")
569
  txt_clip = TextClip(
570
  narration_text,
571
+ fontsize=caption_font_size,
572
+ color=caption_color,
573
  align='center',
574
  size=(TARGET_RESOLUTION[0] * 0.7, None)
575
  ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
576
  clip = CompositeVideoClip([clip, txt_clip])
 
577
  clip = clip.set_audio(audio_clip)
578
+ if transition_effect == "fade":
579
+ clip = clip.crossfadein(0.5)
580
+ elif transition_effect == "slide":
581
+ clip = clip.set_position(lambda t: ('center', -TARGET_RESOLUTION[1] + t * TARGET_RESOLUTION[1] / clip.duration))
582
  print(f"Clip created: {clip.duration:.1f}s")
583
  return clip
584
  except Exception as e:
 
586
  return None
587
 
588
  def fix_imagemagick_policy():
 
589
  try:
590
  print("Attempting to fix ImageMagick security policies...")
591
  policy_paths = [
 
609
  print(f"Error fixing policies: {e}")
610
  return False
611
 
612
+ def handle_music_upload(music_file):
613
+ if music_file:
614
+ music_path = "music.mp3"
615
+ try:
616
+ if isinstance(music_file, str):
617
+ shutil.copy(music_file, music_path)
618
+ else:
619
+ with open(music_path, 'wb') as f:
620
+ f.write(music_file.read())
621
+ print(f"Music file uploaded and renamed to {music_path}")
622
+ return music_path
623
+ except Exception as e:
624
+ print(f"Error handling music upload: {e}")
625
+ return None
626
+ return None
627
 
628
+ def generate_video(user_input, resolution, caption_option, voice_option, voice_speed, music_file, caption_color, caption_font_size, fps, preset, transition_effect, bitrate):
629
+ global TARGET_RESOLUTION, CAPTION_COLOR, CAPTION_FONT_SIZE, TEMP_FOLDER
630
+ import shutil
631
  # Set resolution
632
  if resolution == "Full":
633
  TARGET_RESOLUTION = (1920, 1080)
634
  elif resolution == "Short":
635
  TARGET_RESOLUTION = (1080, 1920)
636
+ elif resolution == "Square":
637
+ TARGET_RESOLUTION = (1080, 1080)
638
  else:
639
+ TARGET_RESOLUTION = (1920, 1080)
640
+ # Set caption settings
641
+ CAPTION_COLOR = caption_color if caption_option == "Yes" else "transparent"
642
+ CAPTION_FONT_SIZE = caption_font_size
 
643
  # Create a unique temporary folder
644
  TEMP_FOLDER = tempfile.mkdtemp()
645
+ # Handle music upload
646
+ handle_music_upload(music_file)
647
  # Fix ImageMagick policy
648
  fix_success = fix_imagemagick_policy()
649
  if not fix_success:
650
  print("Will use alternative methods if needed")
 
651
  print("Generating script from API...")
652
  script = generate_script(user_input)
653
  if not script:
 
661
  shutil.rmtree(TEMP_FOLDER)
662
  return None
663
  print(f"Parsed {len(elements)//2} script segments.")
 
664
  paired_elements = []
665
  for i in range(0, len(elements), 2):
666
  if i + 1 < len(elements):
667
  paired_elements.append((elements[i], elements[i + 1]))
 
668
  if not paired_elements:
669
  print("No valid script segments found.")
670
  shutil.rmtree(TEMP_FOLDER)
671
  return None
 
672
  clips = []
673
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
674
  print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
 
676
  if not media_asset:
677
  print(f"Skipping segment {idx+1} due to missing media asset.")
678
  continue
679
+ tts_path = generate_tts(tts_elem['text'], voice_option, voice_speed)
680
  if not tts_path:
681
  print(f"Skipping segment {idx+1} due to TTS generation failure.")
682
  continue
 
687
  duration=tts_elem['duration'],
688
  effects=media_elem.get('effects', 'fade-in'),
689
  narration_text=tts_elem['text'],
690
+ segment_index=idx,
691
+ caption_color=CAPTION_COLOR,
692
+ caption_font_size=CAPTION_FONT_SIZE,
693
+ transition_effect=transition_effect
694
  )
695
  if clip:
696
  clips.append(clip)
697
  else:
698
  print(f"Clip creation failed for segment {idx+1}.")
 
699
  if not clips:
700
  print("No clips were successfully created.")
701
  shutil.rmtree(TEMP_FOLDER)
702
  return None
 
703
  print("\nConcatenating clips...")
704
+ final_video = concatenate_videoclips(clips, method="compose", transition=TextClip("", duration=0.5) if transition_effect == "fade" else None)
705
  final_video = add_background_music(final_video, bg_music_volume=0.08)
 
706
  print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
707
+ final_video.write_videofile(
708
+ OUTPUT_VIDEO_FILENAME,
709
+ codec='libx264',
710
+ fps=fps,
711
+ preset=preset,
712
+ bitrate=f"{bitrate}k" if bitrate else None
713
+ )
714
  print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
 
715
  # Clean up
716
  print("Cleaning up temporary files...")
717
  shutil.rmtree(TEMP_FOLDER)
718
+ if os.path.exists("music.mp3"):
719
+ os.remove("music.mp3")
720
+ print("Removed uploaded music file.")
721
  print("Temporary files removed.")
 
722
  return OUTPUT_VIDEO_FILENAME
723
 
724
+ # Gradio Interface
725
  iface = gr.Interface(
726
  fn=generate_video,
727
  inputs=[
728
  gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
729
+ gr.Radio(["Full", "Short", "Square"], label="Resolution", value="Full"),
730
+ gr.Radio(["Yes", "No"], label="Captions", value="Yes"),
731
+ gr.Dropdown(
732
+ ["af_heart", "en_male", "en_female", "en_neutral"],
733
+ label="Voice Option",
734
+ value="af_heart",
735
+ info="Select the narration voice."
736
+ ),
737
+ gr.Slider(
738
+ minimum=0.5,
739
+ maximum=1.5,
740
+ step=0.1,
741
+ label="Voice Speed",
742
+ value=0.9,
743
+ info="Adjust the speed of the narration."
744
+ ),
745
+ gr.File(
746
+ label="Upload Background Music (MP3)",
747
+ file_types=[".mp3"],
748
+ info="Upload an MP3 file for background music. It will be renamed to music.mp3."
749
+ ),
750
+ gr.ColorPicker(
751
+ label="Caption Color",
752
+ value="#FFFFFF",
753
+ info="Choose the color for captions (used if Captions is Yes)."
754
+ ),
755
+ gr.Slider(
756
+ minimum=20,
757
+ maximum=60,
758
+ step=1,
759
+ label="Caption Font Size",
760
+ value=45,
761
+ info="Set the font size for captions."
762
+ ),
763
+ gr.Slider(
764
+ minimum=24,
765
+ maximum=60,
766
+ step=1,
767
+ label="FPS",
768
+ value=30,
769
+ info="Set the frames per second for the video."
770
+ ),
771
+ gr.Dropdown(
772
+ ["ultrafast", "veryfast", "fast", "medium", "slow"],
773
+ label="Encoding Preset",
774
+ value="veryfast",
775
+ info="Choose the encoding speed vs quality tradeoff."
776
+ ),
777
+ gr.Dropdown(
778
+ ["fade", "slide", "none"],
779
+ label="Transition Effect",
780
+ value="fade",
781
+ info="Select the transition effect between clips."
782
+ ),
783
+ gr.Slider(
784
+ minimum=1000,
785
+ maximum=8000,
786
+ step=500,
787
+ label="Bitrate (kbps)",
788
+ value=4000,
789
+ info="Set the video bitrate for quality control."
790
+ )
791
  ],
792
  outputs=gr.Video(label="Generated Video"),
793
  title="AI Documentary Video Generator",
794
+ description="Create a funny documentary-style video with customizable options. Upload background music, adjust voice, captions, and video settings. Note: Generation may take several minutes on CPU."
795
  )
796
 
797
  # Launch the interface
798
+ if platform.system() == "Emscripten":
799
+ asyncio.ensure_future(iface.launch(share=False))
800
+ else:
801
+ if __name__ == "__main__":
802
+ asyncio.run(iface.launch(share=False))