testdeep123 commited on
Commit
cca05b0
·
verified ·
1 Parent(s): 8bf3a47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +246 -147
app.py CHANGED
@@ -1,95 +1,160 @@
1
- import asyncio
2
- import platform
 
 
 
 
 
 
 
3
  import os
4
- import shutil
 
5
  import tempfile
6
  import random
7
- import re
8
- import math
9
- import time
10
- import requests
11
- from urllib.parse import quote
12
- from bs4 import BeautifulSoup
13
- import numpy as np
14
- from PIL import Image, ImageDraw, ImageFont
15
  import cv2
16
- import soundfile as sf
17
- from pydub import AudioSegment
18
- from gtts import gTTS
19
- from kokoro import KPipeline
20
  from moviepy.editor import (
21
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
22
  CompositeVideoClip, TextClip, CompositeAudioClip
23
  )
 
 
 
24
  import moviepy.video.fx.all as vfx
25
  import moviepy.config as mpy_config
26
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
27
 
28
- # Initialize Kokoro TTS pipeline
29
- pipeline = KPipeline(lang_code='a')
 
30
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
31
 
32
- # Global Configuration
33
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
34
  OPENROUTER_API_KEY = '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'
35
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
36
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
37
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
38
 
39
- # Global Variables
 
 
 
 
 
 
 
40
  TARGET_RESOLUTION = None
41
  CAPTION_COLOR = None
42
- CAPTION_FONT_SIZE = None
43
  TEMP_FOLDER = None
44
 
45
- # Helper Functions (unchanged from original code)
46
  def generate_script(user_input):
 
47
  headers = {
48
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
49
  'HTTP-Referer': 'https://your-domain.com',
50
  'X-Title': 'AI Documentary Maker'
51
  }
 
52
  prompt = f"""Short Documentary Script GeneratorInstructions:
 
53
  If I say "use this," just output the script exactly as I gave it.
54
  If I only give topics, generate a script based on them.
55
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
56
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
57
  Formatting Rules:
 
 
58
  Title in Square Brackets:
 
 
59
  Each section starts with a one-word title inside [ ] (max two words if necessary).
60
  This title will be used as a search term for Pexels footage.
 
 
 
61
  Casual & Funny Narration:
 
 
62
  Each section has 5-10 words of narration.
63
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 
 
 
64
  No Special Formatting:
 
 
65
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 
 
 
66
  Generalized Search Terms:
 
 
67
  If a term is too specific, make it more general for Pexels search.
 
 
 
68
  Scene-Specific Writing:
 
 
69
  Each section describes only what should be shown in the video.
 
 
 
70
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 
 
71
  No extra text, just the script.
 
 
 
72
  Example Output:
73
  [North Korea]
 
74
  Top 5 unknown facts about North Korea.
 
75
  [Invisibility]
 
76
  North Korea’s internet speed is so fast… it doesn’t exist.
 
77
  [Leadership]
 
78
  Kim Jong-un once won an election with 100% votes… against himself.
 
79
  [Magic]
 
80
  North Korea discovered time travel. That’s why their news is always from the past.
 
81
  [Warning]
 
82
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 
83
  [Freedom]
 
84
  North Korean citizens can do anything… as long as it's government-approved.
85
  Now here is the Topic/scrip: {user_input}
86
  """
 
87
  data = {
88
  'model': OPENROUTER_MODEL,
89
  'messages': [{'role': 'user', 'content': prompt}],
90
  'temperature': 0.4,
91
  'max_tokens': 5000
92
  }
 
93
  try:
94
  response = requests.post(
95
  'https://openrouter.ai/api/v1/chat/completions',
@@ -97,6 +162,7 @@ Now here is the Topic/scrip: {user_input}
97
  json=data,
98
  timeout=30
99
  )
 
100
  if response.status_code == 200:
101
  response_data = response.json()
102
  if 'choices' in response_data and len(response_data['choices']) > 0:
@@ -107,14 +173,22 @@ Now here is the Topic/scrip: {user_input}
107
  else:
108
  print(f"API Error {response.status_code}: {response.text}")
109
  return None
 
110
  except Exception as e:
111
  print(f"Request failed: {str(e)}")
112
  return None
113
 
114
  def parse_script(script_text):
 
 
 
 
 
 
115
  sections = {}
116
  current_title = None
117
  current_text = ""
 
118
  try:
119
  for line in script_text.splitlines():
120
  line = line.strip()
@@ -128,50 +202,63 @@ def parse_script(script_text):
128
  current_text = line[bracket_end+1:].strip()
129
  elif current_title:
130
  current_text += line + " "
 
131
  if current_title:
132
  sections[current_title] = current_text.strip()
 
133
  elements = []
134
  for title, narration in sections.items():
135
  if not title or not narration:
136
  continue
 
137
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
138
  words = narration.split()
139
  duration = max(3, len(words) * 0.5)
140
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
141
  elements.append(media_element)
142
  elements.append(tts_element)
 
143
  return elements
144
  except Exception as e:
145
  print(f"Error parsing script: {e}")
146
  return []
147
 
148
  def search_pexels_videos(query, pexels_api_key):
 
149
  headers = {'Authorization': pexels_api_key}
150
  base_url = "https://api.pexels.com/videos/search"
151
  num_pages = 3
152
  videos_per_page = 15
 
153
  max_retries = 3
154
  retry_delay = 1
 
155
  search_query = query
156
  all_videos = []
 
157
  for page in range(1, num_pages + 1):
158
  for attempt in range(max_retries):
159
  try:
160
  params = {"query": search_query, "per_page": videos_per_page, "page": page}
161
  response = requests.get(base_url, headers=headers, params=params, timeout=10)
 
162
  if response.status_code == 200:
163
  data = response.json()
164
  videos = data.get("videos", [])
 
165
  if not videos:
166
  print(f"No videos found on page {page}.")
167
  break
 
168
  for video in videos:
169
  video_files = video.get("video_files", [])
170
  for file in video_files:
171
  if file.get("quality") == "hd":
172
  all_videos.append(file.get("link"))
173
  break
 
174
  break
 
175
  elif response.status_code == 429:
176
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
177
  time.sleep(retry_delay)
@@ -184,6 +271,7 @@ def search_pexels_videos(query, pexels_api_key):
184
  retry_delay *= 2
185
  else:
186
  break
 
187
  except requests.exceptions.RequestException as e:
188
  print(f"Request exception: {e}")
189
  if attempt < max_retries - 1:
@@ -192,6 +280,7 @@ def search_pexels_videos(query, pexels_api_key):
192
  retry_delay *= 2
193
  else:
194
  break
 
195
  if all_videos:
196
  random_video = random.choice(all_videos)
197
  print(f"Selected random video from {len(all_videos)} HD videos")
@@ -201,14 +290,18 @@ def search_pexels_videos(query, pexels_api_key):
201
  return None
202
 
203
  def search_pexels_images(query, pexels_api_key):
 
204
  headers = {'Authorization': pexels_api_key}
205
  url = "https://api.pexels.com/v1/search"
206
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
 
207
  max_retries = 3
208
  retry_delay = 1
 
209
  for attempt in range(max_retries):
210
  try:
211
  response = requests.get(url, headers=headers, params=params, timeout=10)
 
212
  if response.status_code == 200:
213
  data = response.json()
214
  photos = data.get("photos", [])
@@ -219,6 +312,7 @@ def search_pexels_images(query, pexels_api_key):
219
  else:
220
  print(f"No images found for query: {query}")
221
  return None
 
222
  elif response.status_code == 429:
223
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
224
  time.sleep(retry_delay)
@@ -229,27 +323,32 @@ def search_pexels_images(query, pexels_api_key):
229
  print(f"Retrying in {retry_delay} seconds...")
230
  time.sleep(retry_delay)
231
  retry_delay *= 2
 
232
  except requests.exceptions.RequestException as e:
233
  print(f"Request exception: {e}")
234
  if attempt < max_retries - 1:
235
  print(f"Retrying in {retry_delay} seconds...")
236
  time.sleep(retry_delay)
237
  retry_delay *= 2
 
238
  print(f"No Pexels images found for query: {query} after all attempts")
239
  return None
240
 
241
  def search_google_images(query):
 
242
  try:
243
  search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
244
  headers = {"User-Agent": USER_AGENT}
245
  response = requests.get(search_url, headers=headers, timeout=10)
246
  soup = BeautifulSoup(response.text, "html.parser")
 
247
  img_tags = soup.find_all("img")
248
  image_urls = []
249
  for img in img_tags:
250
  src = img.get("src", "")
251
  if src.startswith("http") and "gstatic" not in src:
252
  image_urls.append(src)
 
253
  if image_urls:
254
  return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
255
  else:
@@ -260,15 +359,19 @@ def search_google_images(query):
260
  return None
261
 
262
  def download_image(image_url, filename):
 
263
  try:
264
  headers = {"User-Agent": USER_AGENT}
265
  print(f"Downloading image from: {image_url} to {filename}")
266
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
267
  response.raise_for_status()
 
268
  with open(filename, 'wb') as f:
269
  for chunk in response.iter_content(chunk_size=8192):
270
  f.write(chunk)
 
271
  print(f"Image downloaded successfully to: {filename}")
 
272
  try:
273
  img = Image.open(filename)
274
  img.verify()
@@ -283,6 +386,7 @@ def download_image(image_url, filename):
283
  if os.path.exists(filename):
284
  os.remove(filename)
285
  return None
 
286
  except requests.exceptions.RequestException as e_download:
287
  print(f"Image download error: {e_download}")
288
  if os.path.exists(filename):
@@ -295,6 +399,7 @@ def download_image(image_url, filename):
295
  return None
296
 
297
  def download_video(video_url, filename):
 
298
  try:
299
  response = requests.get(video_url, stream=True, timeout=30)
300
  response.raise_for_status()
@@ -310,7 +415,13 @@ def download_video(video_url, filename):
310
  return None
311
 
312
  def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
 
 
 
 
 
313
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
 
314
  if "news" in prompt.lower():
315
  print(f"News-related query detected: {prompt}. Using Google Images...")
316
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
@@ -322,6 +433,7 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
322
  return {"path": downloaded_image, "asset_type": "image"}
323
  else:
324
  print(f"Google Images search failed for prompt: {prompt}")
 
325
  if random.random() < 0.25:
326
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
327
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
@@ -332,6 +444,7 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
332
  return {"path": downloaded_video, "asset_type": "video"}
333
  else:
334
  print(f"Pexels video search failed for prompt: {prompt}")
 
335
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
336
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
337
  if image_url:
@@ -341,6 +454,7 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
341
  return {"path": downloaded_image, "asset_type": "image"}
342
  else:
343
  print(f"Pexels image download failed for prompt: {prompt}")
 
344
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
345
  for term in fallback_terms:
346
  print(f"Trying fallback image search with term: {term}")
@@ -355,10 +469,12 @@ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
355
  print(f"Fallback image download failed for term: {term}")
356
  else:
357
  print(f"Fallback image search failed for term: {term}")
 
358
  print(f"Failed to generate visual asset for prompt: {prompt}")
359
  return None
360
 
361
  def generate_silent_audio(duration, sample_rate=24000):
 
362
  num_samples = int(duration * sample_rate)
363
  silence = np.zeros(num_samples, dtype=np.float32)
364
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
@@ -366,15 +482,20 @@ def generate_silent_audio(duration, sample_rate=24000):
366
  print(f"Silent audio generated: {silent_path}")
367
  return silent_path
368
 
369
- def generate_tts(text, voice, voice_speed):
 
 
 
370
  safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
371
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
 
372
  if os.path.exists(file_path):
373
  print(f"Using cached TTS for text '{text[:10]}...'")
374
  return file_path
 
375
  try:
376
- kokoro_voice = voice
377
- generator = pipeline(text, voice=kokoro_voice, speed=voice_speed, split_pattern=r'\n+')
378
  audio_segments = []
379
  for i, (gs, ps, audio) in enumerate(generator):
380
  audio_segments.append(audio)
@@ -399,25 +520,31 @@ def generate_tts(text, voice, voice_speed):
399
  return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
400
 
401
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
 
402
  target_w, target_h = target_resolution
403
  clip_aspect = clip.w / clip.h
404
  target_aspect = target_w / target_h
 
405
  if clip_aspect > target_aspect:
406
  new_height = target_h
407
  new_width = int(new_height * clip_aspect)
408
  else:
409
  new_width = target_w
410
  new_height = int(new_width / clip_aspect)
 
411
  clip = clip.resize(newsize=(new_width, new_height))
412
  base_scale = 1.15
413
  new_width = int(new_width * base_scale)
414
  new_height = int(new_height * base_scale)
415
  clip = clip.resize(newsize=(new_width, new_height))
 
416
  max_offset_x = new_width - target_w
417
  max_offset_y = new_height - target_h
 
418
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
419
  if effect_type is None or effect_type == "random":
420
  effect_type = random.choice(available_effects)
 
421
  if effect_type == "zoom-in":
422
  start_zoom = 0.9
423
  end_zoom = 1.1
@@ -445,6 +572,7 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
445
  end_center = (target_w / 2, target_h / 2)
446
  else:
447
  raise ValueError(f"Unsupported effect_type: {effect_type}")
 
448
  def transform_frame(get_frame, t):
449
  frame = get_frame(t)
450
  ratio = t / clip.duration if clip.duration > 0 else 0
@@ -463,12 +591,15 @@ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
463
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
464
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
465
  return resized_frame
 
466
  return clip.fl(transform_frame)
467
 
468
  def resize_to_fill(clip, target_resolution):
 
469
  target_w, target_h = target_resolution
470
  clip_aspect = clip.w / clip.h
471
  target_aspect = target_w / target_h
 
472
  if clip_aspect > target_aspect:
473
  clip = clip.resize(height=target_h)
474
  crop_amount = (clip.w - target_w) / 2
@@ -477,9 +608,22 @@ def resize_to_fill(clip, target_resolution):
477
  clip = clip.resize(width=target_w)
478
  crop_amount = (clip.h - target_h) / 2
479
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
 
480
  return clip
481
 
 
 
 
 
 
 
 
 
 
 
 
482
  def add_background_music(final_video, bg_music_volume=0.10):
 
483
  try:
484
  bg_music_path = "music.mp3"
485
  if bg_music_path and os.path.exists(bg_music_path):
@@ -503,15 +647,18 @@ def add_background_music(final_video, bg_music_volume=0.10):
503
  print("Continuing without background music")
504
  return final_video
505
 
506
- def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0, caption_color="white", caption_font_size=45, transition_effect="fade"):
 
507
  try:
508
  print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
509
  if not os.path.exists(media_path) or not os.path.exists(tts_path):
510
  print("Missing media or TTS file")
511
  return None
 
512
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
513
  audio_duration = audio_clip.duration
514
  target_duration = audio_duration + 0.2
 
515
  if asset_type == "video":
516
  clip = VideoFileClip(media_path)
517
  clip = resize_to_fill(clip, TARGET_RESOLUTION)
@@ -531,7 +678,8 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
531
  clip = clip.fadein(0.3).fadeout(0.3)
532
  else:
533
  return None
534
- if narration_text and caption_color != "transparent":
 
535
  try:
536
  words = narration_text.split()
537
  chunks = []
@@ -543,42 +691,42 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
543
  current_chunk = []
544
  if current_chunk:
545
  chunks.append(' '.join(current_chunk))
 
546
  chunk_duration = audio_duration / len(chunks)
547
  subtitle_clips = []
548
  subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
 
549
  for i, chunk_text in enumerate(chunks):
550
  start_time = i * chunk_duration
551
  end_time = (i + 1) * chunk_duration
552
  txt_clip = TextClip(
553
  chunk_text,
554
- fontsize=caption_font_size,
555
  font='Arial-Bold',
556
- color=caption_color,
557
  bg_color='rgba(0, 0, 0, 0.25)',
558
  method='caption',
559
  align='center',
560
  stroke_width=2,
561
- stroke_color=caption_color,
562
  size=(TARGET_RESOLUTION[0] * 0.8, None)
563
  ).set_start(start_time).set_end(end_time)
564
  txt_clip = txt_clip.set_position(('center', subtitle_y_position))
565
  subtitle_clips.append(txt_clip)
 
566
  clip = CompositeVideoClip([clip] + subtitle_clips)
567
  except Exception as sub_error:
568
  print(f"Subtitle error: {sub_error}")
569
  txt_clip = TextClip(
570
  narration_text,
571
- fontsize=caption_font_size,
572
- color=caption_color,
573
  align='center',
574
  size=(TARGET_RESOLUTION[0] * 0.7, None)
575
  ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
576
  clip = CompositeVideoClip([clip, txt_clip])
 
577
  clip = clip.set_audio(audio_clip)
578
- if transition_effect == "fade":
579
- clip = clip.crossfadein(0.5)
580
- elif transition_effect == "slide":
581
- clip = clip.set_position(lambda t: ('center', -TARGET_RESOLUTION[1] + t * TARGET_RESOLUTION[1] / clip.duration))
582
  print(f"Clip created: {clip.duration:.1f}s")
583
  return clip
584
  except Exception as e:
@@ -586,6 +734,7 @@ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, n
586
  return None
587
 
588
  def fix_imagemagick_policy():
 
589
  try:
590
  print("Attempting to fix ImageMagick security policies...")
591
  policy_paths = [
@@ -609,45 +758,31 @@ def fix_imagemagick_policy():
609
  print(f"Error fixing policies: {e}")
610
  return False
611
 
612
- def handle_music_upload(music_file):
613
- if music_file:
614
- music_path = "music.mp3"
615
- try:
616
- if isinstance(music_file, str):
617
- shutil.copy(music_file, music_path)
618
- else:
619
- with open(music_path, 'wb') as f:
620
- f.write(music_file.read())
621
- print(f"Music file uploaded and renamed to {music_path}")
622
- return music_path
623
- except Exception as e:
624
- print(f"Error handling music upload: {e}")
625
- return None
626
- return None
627
 
628
- def generate_video(user_input, resolution, caption_option, voice_option, voice_speed, music_file, caption_color, caption_font_size, fps, preset, transition_effect, bitrate):
629
- global TARGET_RESOLUTION, CAPTION_COLOR, CAPTION_FONT_SIZE, TEMP_FOLDER
630
- import Shri
631
  # Set resolution
632
  if resolution == "Full":
633
  TARGET_RESOLUTION = (1920, 1080)
634
  elif resolution == "Short":
635
  TARGET_RESOLUTION = (1080, 1920)
636
- elif resolution == "Square":
637
- TARGET_RESOLUTION = (1080, 1080)
638
  else:
639
- TARGET_RESOLUTION = (1920, 1080)
640
- # Set caption settings
641
- CAPTION_COLOR = caption_color if caption_option == "Yes" else "transparent"
642
- CAPTION_FONT_SIZE = caption_font_size
 
643
  # Create a unique temporary folder
644
  TEMP_FOLDER = tempfile.mkdtemp()
645
- # Handle music upload
646
- handle_music_upload(music_file)
647
  # Fix ImageMagick policy
648
  fix_success = fix_imagemagick_policy()
649
  if not fix_success:
650
  print("Will use alternative methods if needed")
 
651
  print("Generating script from API...")
652
  script = generate_script(user_input)
653
  if not script:
@@ -661,14 +796,17 @@ def generate_video(user_input, resolution, caption_option, voice_option, voice_s
661
  shutil.rmtree(TEMP_FOLDER)
662
  return None
663
  print(f"Parsed {len(elements)//2} script segments.")
 
664
  paired_elements = []
665
  for i in range(0, len(elements), 2):
666
  if i + 1 < len(elements):
667
  paired_elements.append((elements[i], elements[i + 1]))
 
668
  if not paired_elements:
669
  print("No valid script segments found.")
670
  shutil.rmtree(TEMP_FOLDER)
671
  return None
 
672
  clips = []
673
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
674
  print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
@@ -676,7 +814,7 @@ def generate_video(user_input, resolution, caption_option, voice_option, voice_s
676
  if not media_asset:
677
  print(f"Skipping segment {idx+1} due to missing media asset.")
678
  continue
679
- tts_path = generate_tts(tts_elem['text'], voice_option, voice_speed)
680
  if not tts_path:
681
  print(f"Skipping segment {idx+1} due to TTS generation failure.")
682
  continue
@@ -687,107 +825,68 @@ def generate_video(user_input, resolution, caption_option, voice_option, voice_s
687
  duration=tts_elem['duration'],
688
  effects=media_elem.get('effects', 'fade-in'),
689
  narration_text=tts_elem['text'],
690
- segment_index=idx,
691
- caption_color=CAPTION_COLOR,
692
- caption_font_size=CAPTION_FONT_SIZE,
693
- transition_effect=transition_effect
694
  )
695
  if clip:
696
  clips.append(clip)
697
  else:
698
  print(f"Clip creation failed for segment {idx+1}.")
 
699
  if not clips:
700
  print("No clips were successfully created.")
701
  shutil.rmtree(TEMP_FOLDER)
702
  return None
 
703
  print("\nConcatenating clips...")
704
- final_video = concatenate_videoclips(clips, method="compose", transition=TextClip("", duration=0.5) if transition_effect == "fade" else None)
705
  final_video = add_background_music(final_video, bg_music_volume=0.08)
 
706
  print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
707
- final_video.write_videofile(
708
- OUTPUT_VIDEO_FILENAME,
709
- codec='libx264',
710
- fps=fps,
711
- preset=preset,
712
- bitrate=f"{bitrate}k" if bitrate else None
713
- )
714
  print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
 
715
  # Clean up
716
  print("Cleaning up temporary files...")
717
  shutil.rmtree(TEMP_FOLDER)
718
- if os.path.exists("music.mp3"):
719
- os.remove("music.mp3")
720
- print("Removed uploaded music file.")
721
  print("Temporary files removed.")
 
722
  return OUTPUT_VIDEO_FILENAME
723
 
724
- # Gradio Interface
725
- iface = gr.Interface(
726
- fn=generate_video,
727
- inputs=[
728
- gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
729
- gr.Radio(["Full", "Short", "Square"], label="Resolution", value="Full"),
730
- gr.Radio(["Yes", "No"], label="Captions", value="Yes"),
731
- gr.Dropdown(
732
- ["af_heart", "en_male", "en_female", "en_neutral"],
733
- label="Voice Option",
734
- value="af_heart"
735
- ),
736
- gr.Slider(
737
- minimum=0.5,
738
- maximum=1.5,
739
- step=0.1,
740
- label="Voice Speed",
741
- value=0.9
742
- ),
743
- gr.File(
744
- label="Upload Background Music (MP3)",
745
- file_types=[".mp3"]
746
- ),
747
- gr.ColorPicker(
748
- label="Caption Color",
749
- value="#FFFFFF"
750
- ),
751
- gr.Slider(
752
- minimum=20,
753
- maximum=60,
754
- step=1,
755
- label="Caption Font Size",
756
- value=45
757
- ),
758
- gr.Slider(
759
- minimum=24,
760
- maximum=60,
761
- step=1,
762
- label="FPS",
763
- value=30
764
- ),
765
- gr.Dropdown(
766
- ["ultrafast", "veryfast", "fast", "medium", "slow"],
767
- label="Encoding Preset",
768
- value="veryfast"
769
- ),
770
- gr.Dropdown(
771
- ["fade", "slide", "none"],
772
- label="Transition Effect",
773
- value="fade"
774
- ),
775
- gr.Slider(
776
- minimum=1000,
777
- maximum=8000,
778
- step=500,
779
- label="Bitrate (kbps)",
780
- value=4000
781
- )
782
- ],
783
- outputs=gr.Video(label="Generated Video"),
784
- title="AI Documentary Video Generator",
785
- description="Create a funny documentary-style video with customizable options. Upload background music, adjust voice, captions, and video settings. Note: Generation may take several minutes on CPU."
786
- )
787
 
788
- # Launch the interface
789
- if platform.system() == "Emscripten":
790
- asyncio.ensure_future(iface.launch(share=False))
791
- else:
792
- if __name__ == "__main__":
793
- asyncio.run(iface.launch(share=False))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # Import necessary libraries
4
+ from kokoro import KPipeline
5
+
6
+ import soundfile as sf
7
+ import torch
8
+
9
+ import soundfile as sf
10
  import os
11
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
12
+ from PIL import Image
13
  import tempfile
14
  import random
 
 
 
 
 
 
 
 
15
  import cv2
16
+ import math
17
+ import os, requests, io, time, re, random
 
 
18
  from moviepy.editor import (
19
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
20
  CompositeVideoClip, TextClip, CompositeAudioClip
21
  )
22
+ import gradio as gr
23
+ import shutil
24
+ import os
25
  import moviepy.video.fx.all as vfx
26
  import moviepy.config as mpy_config
27
+ from pydub import AudioSegment
28
+ from pydub.generators import Sine
29
+
30
+ from PIL import Image, ImageDraw, ImageFont
31
+ import numpy as np
32
+ from bs4 import BeautifulSoup
33
+ import base64
34
+ from urllib.parse import quote
35
+ import pysrt
36
+ from gtts import gTTS
37
+ import gradio as gr # Import Gradio
38
 
39
+ # Initialize Kokoro TTS pipeline (using American English)
40
+ pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
41
+ # Ensure ImageMagick binary is set
42
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
43
 
44
+ # ---------------- Global Configuration ---------------- #
45
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
46
  OPENROUTER_API_KEY = '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'
47
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
48
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
49
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
50
 
51
+ # ---------------- Helper Functions ---------------- #
52
+ # (Your existing helper functions remain unchanged: generate_script, parse_script,
53
+ # search_pexels_videos, search_pexels_images, search_google_images, download_image,
54
+ # download_video, generate_media, generate_tts, apply_kenburns_effect,
55
+ # resize_to_fill, find_mp3_files, add_background_music, create_clip,
56
+ # fix_imagemagick_policy)
57
+
58
+ # Define these globally as they were in your original code but will be set per run
59
  TARGET_RESOLUTION = None
60
  CAPTION_COLOR = None
 
61
  TEMP_FOLDER = None
62
 
 
63
  def generate_script(user_input):
64
+ """Generate documentary script with proper OpenRouter handling."""
65
  headers = {
66
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
67
  'HTTP-Referer': 'https://your-domain.com',
68
  'X-Title': 'AI Documentary Maker'
69
  }
70
+
71
  prompt = f"""Short Documentary Script GeneratorInstructions:
72
+
73
  If I say "use this," just output the script exactly as I gave it.
74
  If I only give topics, generate a script based on them.
75
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
76
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
77
  Formatting Rules:
78
+
79
+
80
  Title in Square Brackets:
81
+
82
+
83
  Each section starts with a one-word title inside [ ] (max two words if necessary).
84
  This title will be used as a search term for Pexels footage.
85
+
86
+
87
+
88
  Casual & Funny Narration:
89
+
90
+
91
  Each section has 5-10 words of narration.
92
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
93
+
94
+
95
+
96
  No Special Formatting:
97
+
98
+
99
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
100
+
101
+
102
+
103
  Generalized Search Terms:
104
+
105
+
106
  If a term is too specific, make it more general for Pexels search.
107
+
108
+
109
+
110
  Scene-Specific Writing:
111
+
112
+
113
  Each section describes only what should be shown in the video.
114
+
115
+
116
+
117
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
118
+
119
+
120
  No extra text, just the script.
121
+
122
+
123
+
124
  Example Output:
125
  [North Korea]
126
+
127
  Top 5 unknown facts about North Korea.
128
+
129
  [Invisibility]
130
+
131
  North Korea’s internet speed is so fast… it doesn’t exist.
132
+
133
  [Leadership]
134
+
135
  Kim Jong-un once won an election with 100% votes… against himself.
136
+
137
  [Magic]
138
+
139
  North Korea discovered time travel. That’s why their news is always from the past.
140
+
141
  [Warning]
142
+
143
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
144
+
145
  [Freedom]
146
+
147
  North Korean citizens can do anything… as long as it's government-approved.
148
  Now here is the Topic/scrip: {user_input}
149
  """
150
+
151
  data = {
152
  'model': OPENROUTER_MODEL,
153
  'messages': [{'role': 'user', 'content': prompt}],
154
  'temperature': 0.4,
155
  'max_tokens': 5000
156
  }
157
+
158
  try:
159
  response = requests.post(
160
  'https://openrouter.ai/api/v1/chat/completions',
 
162
  json=data,
163
  timeout=30
164
  )
165
+
166
  if response.status_code == 200:
167
  response_data = response.json()
168
  if 'choices' in response_data and len(response_data['choices']) > 0:
 
173
  else:
174
  print(f"API Error {response.status_code}: {response.text}")
175
  return None
176
+
177
  except Exception as e:
178
  print(f"Request failed: {str(e)}")
179
  return None
180
 
181
  def parse_script(script_text):
182
+ """
183
+ Parse the generated script into a list of elements.
184
+ For each section, create two elements:
185
+ - A 'media' element using the section title as the visual prompt.
186
+ - A 'tts' element with the narration text, voice info, and computed duration.
187
+ """
188
  sections = {}
189
  current_title = None
190
  current_text = ""
191
+
192
  try:
193
  for line in script_text.splitlines():
194
  line = line.strip()
 
202
  current_text = line[bracket_end+1:].strip()
203
  elif current_title:
204
  current_text += line + " "
205
+
206
  if current_title:
207
  sections[current_title] = current_text.strip()
208
+
209
  elements = []
210
  for title, narration in sections.items():
211
  if not title or not narration:
212
  continue
213
+
214
  media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
215
  words = narration.split()
216
  duration = max(3, len(words) * 0.5)
217
  tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
218
  elements.append(media_element)
219
  elements.append(tts_element)
220
+
221
  return elements
222
  except Exception as e:
223
  print(f"Error parsing script: {e}")
224
  return []
225
 
226
  def search_pexels_videos(query, pexels_api_key):
227
+ """Search for a video on Pexels by query and return a random HD video."""
228
  headers = {'Authorization': pexels_api_key}
229
  base_url = "https://api.pexels.com/videos/search"
230
  num_pages = 3
231
  videos_per_page = 15
232
+
233
  max_retries = 3
234
  retry_delay = 1
235
+
236
  search_query = query
237
  all_videos = []
238
+
239
  for page in range(1, num_pages + 1):
240
  for attempt in range(max_retries):
241
  try:
242
  params = {"query": search_query, "per_page": videos_per_page, "page": page}
243
  response = requests.get(base_url, headers=headers, params=params, timeout=10)
244
+
245
  if response.status_code == 200:
246
  data = response.json()
247
  videos = data.get("videos", [])
248
+
249
  if not videos:
250
  print(f"No videos found on page {page}.")
251
  break
252
+
253
  for video in videos:
254
  video_files = video.get("video_files", [])
255
  for file in video_files:
256
  if file.get("quality") == "hd":
257
  all_videos.append(file.get("link"))
258
  break
259
+
260
  break
261
+
262
  elif response.status_code == 429:
263
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
264
  time.sleep(retry_delay)
 
271
  retry_delay *= 2
272
  else:
273
  break
274
+
275
  except requests.exceptions.RequestException as e:
276
  print(f"Request exception: {e}")
277
  if attempt < max_retries - 1:
 
280
  retry_delay *= 2
281
  else:
282
  break
283
+
284
  if all_videos:
285
  random_video = random.choice(all_videos)
286
  print(f"Selected random video from {len(all_videos)} HD videos")
 
290
  return None
291
 
292
  def search_pexels_images(query, pexels_api_key):
293
+ """Search for an image on Pexels by query."""
294
  headers = {'Authorization': pexels_api_key}
295
  url = "https://api.pexels.com/v1/search"
296
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
297
+
298
  max_retries = 3
299
  retry_delay = 1
300
+
301
  for attempt in range(max_retries):
302
  try:
303
  response = requests.get(url, headers=headers, params=params, timeout=10)
304
+
305
  if response.status_code == 200:
306
  data = response.json()
307
  photos = data.get("photos", [])
 
312
  else:
313
  print(f"No images found for query: {query}")
314
  return None
315
+
316
  elif response.status_code == 429:
317
  print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
318
  time.sleep(retry_delay)
 
323
  print(f"Retrying in {retry_delay} seconds...")
324
  time.sleep(retry_delay)
325
  retry_delay *= 2
326
+
327
  except requests.exceptions.RequestException as e:
328
  print(f"Request exception: {e}")
329
  if attempt < max_retries - 1:
330
  print(f"Retrying in {retry_delay} seconds...")
331
  time.sleep(retry_delay)
332
  retry_delay *= 2
333
+
334
  print(f"No Pexels images found for query: {query} after all attempts")
335
  return None
336
 
337
  def search_google_images(query):
338
+ """Search for images on Google Images (for news-related queries)"""
339
  try:
340
  search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
341
  headers = {"User-Agent": USER_AGENT}
342
  response = requests.get(search_url, headers=headers, timeout=10)
343
  soup = BeautifulSoup(response.text, "html.parser")
344
+
345
  img_tags = soup.find_all("img")
346
  image_urls = []
347
  for img in img_tags:
348
  src = img.get("src", "")
349
  if src.startswith("http") and "gstatic" not in src:
350
  image_urls.append(src)
351
+
352
  if image_urls:
353
  return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
354
  else:
 
359
  return None
360
 
361
  def download_image(image_url, filename):
362
+ """Download an image from a URL to a local file with enhanced error handling."""
363
  try:
364
  headers = {"User-Agent": USER_AGENT}
365
  print(f"Downloading image from: {image_url} to {filename}")
366
  response = requests.get(image_url, headers=headers, stream=True, timeout=15)
367
  response.raise_for_status()
368
+
369
  with open(filename, 'wb') as f:
370
  for chunk in response.iter_content(chunk_size=8192):
371
  f.write(chunk)
372
+
373
  print(f"Image downloaded successfully to: {filename}")
374
+
375
  try:
376
  img = Image.open(filename)
377
  img.verify()
 
386
  if os.path.exists(filename):
387
  os.remove(filename)
388
  return None
389
+
390
  except requests.exceptions.RequestException as e_download:
391
  print(f"Image download error: {e_download}")
392
  if os.path.exists(filename):
 
399
  return None
400
 
401
  def download_video(video_url, filename):
402
+ """Download a video from a URL to a local file."""
403
  try:
404
  response = requests.get(video_url, stream=True, timeout=30)
405
  response.raise_for_status()
 
415
  return None
416
 
417
  def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
418
+ """
419
+ Generate a visual asset by first searching for a video or using a specific search strategy.
420
+ For news-related queries, use Google Images.
421
+ Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
422
+ """
423
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
424
+
425
  if "news" in prompt.lower():
426
  print(f"News-related query detected: {prompt}. Using Google Images...")
427
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
 
433
  return {"path": downloaded_image, "asset_type": "image"}
434
  else:
435
  print(f"Google Images search failed for prompt: {prompt}")
436
+
437
  if random.random() < 0.25:
438
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
439
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
 
444
  return {"path": downloaded_video, "asset_type": "video"}
445
  else:
446
  print(f"Pexels video search failed for prompt: {prompt}")
447
+
448
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
449
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
450
  if image_url:
 
454
  return {"path": downloaded_image, "asset_type": "image"}
455
  else:
456
  print(f"Pexels image download failed for prompt: {prompt}")
457
+
458
  fallback_terms = ["nature", "people", "landscape", "technology", "business"]
459
  for term in fallback_terms:
460
  print(f"Trying fallback image search with term: {term}")
 
469
  print(f"Fallback image download failed for term: {term}")
470
  else:
471
  print(f"Fallback image search failed for term: {term}")
472
+
473
  print(f"Failed to generate visual asset for prompt: {prompt}")
474
  return None
475
 
476
  def generate_silent_audio(duration, sample_rate=24000):
477
+ """Generate a silent WAV audio file lasting 'duration' seconds."""
478
  num_samples = int(duration * sample_rate)
479
  silence = np.zeros(num_samples, dtype=np.float32)
480
  silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
 
482
  print(f"Silent audio generated: {silent_path}")
483
  return silent_path
484
 
485
+ def generate_tts(text, voice):
486
+ """
487
+ Generate TTS audio using Kokoro, falling back to gTTS or silent audio if needed.
488
+ """
489
  safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
490
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
491
+
492
  if os.path.exists(file_path):
493
  print(f"Using cached TTS for text '{text[:10]}...'")
494
  return file_path
495
+
496
  try:
497
+ kokoro_voice = 'af_heart' if voice == 'en' else voice
498
+ generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
499
  audio_segments = []
500
  for i, (gs, ps, audio) in enumerate(generator):
501
  audio_segments.append(audio)
 
520
  return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
521
 
522
  def apply_kenburns_effect(clip, target_resolution, effect_type=None):
523
+ """Apply a smooth Ken Burns effect with a single movement pattern."""
524
  target_w, target_h = target_resolution
525
  clip_aspect = clip.w / clip.h
526
  target_aspect = target_w / target_h
527
+
528
  if clip_aspect > target_aspect:
529
  new_height = target_h
530
  new_width = int(new_height * clip_aspect)
531
  else:
532
  new_width = target_w
533
  new_height = int(new_width / clip_aspect)
534
+
535
  clip = clip.resize(newsize=(new_width, new_height))
536
  base_scale = 1.15
537
  new_width = int(new_width * base_scale)
538
  new_height = int(new_height * base_scale)
539
  clip = clip.resize(newsize=(new_width, new_height))
540
+
541
  max_offset_x = new_width - target_w
542
  max_offset_y = new_height - target_h
543
+
544
  available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
545
  if effect_type is None or effect_type == "random":
546
  effect_type = random.choice(available_effects)
547
+
548
  if effect_type == "zoom-in":
549
  start_zoom = 0.9
550
  end_zoom = 1.1
 
572
  end_center = (target_w / 2, target_h / 2)
573
  else:
574
  raise ValueError(f"Unsupported effect_type: {effect_type}")
575
+
576
  def transform_frame(get_frame, t):
577
  frame = get_frame(t)
578
  ratio = t / clip.duration if clip.duration > 0 else 0
 
591
  cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
592
  resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
593
  return resized_frame
594
+
595
  return clip.fl(transform_frame)
596
 
597
  def resize_to_fill(clip, target_resolution):
598
+ """Resize and crop a clip to fill the target resolution while maintaining aspect ratio."""
599
  target_w, target_h = target_resolution
600
  clip_aspect = clip.w / clip.h
601
  target_aspect = target_w / target_h
602
+
603
  if clip_aspect > target_aspect:
604
  clip = clip.resize(height=target_h)
605
  crop_amount = (clip.w - target_w) / 2
 
608
  clip = clip.resize(width=target_w)
609
  crop_amount = (clip.h - target_h) / 2
610
  clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
611
+
612
  return clip
613
 
614
+ def find_mp3_files():
615
+ """Search for any MP3 files in the current directory and subdirectories."""
616
+ mp3_files = []
617
+ for root, dirs, files in os.walk('.'):
618
+ for file in files:
619
+ if file.endswith('.mp3'):
620
+ mp3_path = os.path.join(root, file)
621
+ mp3_files.append(mp3_path)
622
+ print(f"Found MP3 file: {mp3_path}")
623
+ return mp3_files[0] if mp3_files else None
624
+
625
  def add_background_music(final_video, bg_music_volume=0.10):
626
+ """Add background music to the final video using any MP3 file found."""
627
  try:
628
  bg_music_path = "music.mp3"
629
  if bg_music_path and os.path.exists(bg_music_path):
 
647
  print("Continuing without background music")
648
  return final_video
649
 
650
+ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
651
+ """Create a video clip with synchronized subtitles and narration."""
652
  try:
653
  print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
654
  if not os.path.exists(media_path) or not os.path.exists(tts_path):
655
  print("Missing media or TTS file")
656
  return None
657
+
658
  audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
659
  audio_duration = audio_clip.duration
660
  target_duration = audio_duration + 0.2
661
+
662
  if asset_type == "video":
663
  clip = VideoFileClip(media_path)
664
  clip = resize_to_fill(clip, TARGET_RESOLUTION)
 
678
  clip = clip.fadein(0.3).fadeout(0.3)
679
  else:
680
  return None
681
+
682
+ if narration_text and CAPTION_COLOR != "transparent":
683
  try:
684
  words = narration_text.split()
685
  chunks = []
 
691
  current_chunk = []
692
  if current_chunk:
693
  chunks.append(' '.join(current_chunk))
694
+
695
  chunk_duration = audio_duration / len(chunks)
696
  subtitle_clips = []
697
  subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
698
+
699
  for i, chunk_text in enumerate(chunks):
700
  start_time = i * chunk_duration
701
  end_time = (i + 1) * chunk_duration
702
  txt_clip = TextClip(
703
  chunk_text,
704
+ fontsize=45,
705
  font='Arial-Bold',
706
+ color=CAPTION_COLOR,
707
  bg_color='rgba(0, 0, 0, 0.25)',
708
  method='caption',
709
  align='center',
710
  stroke_width=2,
711
+ stroke_color=CAPTION_COLOR,
712
  size=(TARGET_RESOLUTION[0] * 0.8, None)
713
  ).set_start(start_time).set_end(end_time)
714
  txt_clip = txt_clip.set_position(('center', subtitle_y_position))
715
  subtitle_clips.append(txt_clip)
716
+
717
  clip = CompositeVideoClip([clip] + subtitle_clips)
718
  except Exception as sub_error:
719
  print(f"Subtitle error: {sub_error}")
720
  txt_clip = TextClip(
721
  narration_text,
722
+ fontsize=28,
723
+ color=CAPTION_COLOR,
724
  align='center',
725
  size=(TARGET_RESOLUTION[0] * 0.7, None)
726
  ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
727
  clip = CompositeVideoClip([clip, txt_clip])
728
+
729
  clip = clip.set_audio(audio_clip)
 
 
 
 
730
  print(f"Clip created: {clip.duration:.1f}s")
731
  return clip
732
  except Exception as e:
 
734
  return None
735
 
736
  def fix_imagemagick_policy():
737
+ """Fix ImageMagick security policies."""
738
  try:
739
  print("Attempting to fix ImageMagick security policies...")
740
  policy_paths = [
 
758
  print(f"Error fixing policies: {e}")
759
  return False
760
 
761
+ # ---------------- Main Function with Gradio Integration ---------------- #
762
+ def generate_video(user_input, resolution, caption_option):
763
+ """Generate a video based on user input via Gradio."""
764
+ global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
765
+ import shutil
 
 
 
 
 
 
 
 
 
 
766
 
 
 
 
767
  # Set resolution
768
  if resolution == "Full":
769
  TARGET_RESOLUTION = (1920, 1080)
770
  elif resolution == "Short":
771
  TARGET_RESOLUTION = (1080, 1920)
 
 
772
  else:
773
+ TARGET_RESOLUTION = (1920, 1080) # Default
774
+
775
+ # Set caption color
776
+ CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
777
+
778
  # Create a unique temporary folder
779
  TEMP_FOLDER = tempfile.mkdtemp()
780
+
 
781
  # Fix ImageMagick policy
782
  fix_success = fix_imagemagick_policy()
783
  if not fix_success:
784
  print("Will use alternative methods if needed")
785
+
786
  print("Generating script from API...")
787
  script = generate_script(user_input)
788
  if not script:
 
796
  shutil.rmtree(TEMP_FOLDER)
797
  return None
798
  print(f"Parsed {len(elements)//2} script segments.")
799
+
800
  paired_elements = []
801
  for i in range(0, len(elements), 2):
802
  if i + 1 < len(elements):
803
  paired_elements.append((elements[i], elements[i + 1]))
804
+
805
  if not paired_elements:
806
  print("No valid script segments found.")
807
  shutil.rmtree(TEMP_FOLDER)
808
  return None
809
+
810
  clips = []
811
  for idx, (media_elem, tts_elem) in enumerate(paired_elements):
812
  print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
 
814
  if not media_asset:
815
  print(f"Skipping segment {idx+1} due to missing media asset.")
816
  continue
817
+ tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
818
  if not tts_path:
819
  print(f"Skipping segment {idx+1} due to TTS generation failure.")
820
  continue
 
825
  duration=tts_elem['duration'],
826
  effects=media_elem.get('effects', 'fade-in'),
827
  narration_text=tts_elem['text'],
828
+ segment_index=idx
 
 
 
829
  )
830
  if clip:
831
  clips.append(clip)
832
  else:
833
  print(f"Clip creation failed for segment {idx+1}.")
834
+
835
  if not clips:
836
  print("No clips were successfully created.")
837
  shutil.rmtree(TEMP_FOLDER)
838
  return None
839
+
840
  print("\nConcatenating clips...")
841
+ final_video = concatenate_videoclips(clips, method="compose")
842
  final_video = add_background_music(final_video, bg_music_volume=0.08)
843
+
844
  print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
845
+ final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=30, preset='veryfast')
 
 
 
 
 
 
846
  print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
847
+
848
  # Clean up
849
  print("Cleaning up temporary files...")
850
  shutil.rmtree(TEMP_FOLDER)
 
 
 
851
  print("Temporary files removed.")
852
+
853
  return OUTPUT_VIDEO_FILENAME
854
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
855
 
856
+
857
+
858
+ def save_as_music(file):
859
+ if file is None:
860
+ return "No file uploaded!"
861
+
862
+ new_filename = "music.mp3"
863
+ shutil.copy(file, new_filename)
864
+ return f"File saved as {new_filename}"
865
+
866
+ def generate_video(concept, resolution, captions):
867
+ # Dummy response — replace this with your real video generation code!
868
+ return "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4"
869
+
870
+ with gr.Blocks() as demo:
871
+ with gr.Tab("Upload MP3"):
872
+ mp3_input = gr.File(label="Upload MP3", file_types=[".mp3"])
873
+ mp3_output = gr.Textbox(label="Status")
874
+ upload_btn = gr.Button("Save as music.mp3")
875
+ upload_btn.click(save_as_music, inputs=mp3_input, outputs=mp3_output)
876
+
877
+ with gr.Tab("AI Video Generator"):
878
+ concept_input = gr.Textbox(label="Video Concept", placeholder="Enter your video concept here...")
879
+ resolution_input = gr.Radio(["Full", "Short"], label="Resolution", value="Full")
880
+ captions_input = gr.Radio(["Yes", "No"], label="Captions", value="Yes")
881
+ video_output = gr.Video(label="Generated Video")
882
+ generate_btn = gr.Button("Generate Video")
883
+ generate_btn.click(generate_video,
884
+ inputs=[concept_input, resolution_input, captions_input],
885
+ outputs=video_output)
886
+
887
+ demo.launch(share=True)
888
+
889
+
890
+
891
+
892
+