testdeep123 commited on
Commit
15daf82
Β·
verified Β·
1 Parent(s): e5c2de3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +231 -694
app.py CHANGED
@@ -1,20 +1,7 @@
1
-
2
-
3
  # Import necessary libraries
4
  from kokoro import KPipeline
5
-
6
- import soundfile as sf
7
- import torch
8
-
9
  import soundfile as sf
10
  import os
11
- from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
12
- from PIL import Image
13
- import tempfile
14
- import random
15
- import cv2
16
- import math
17
- import os, requests, io, time, re, random
18
  from moviepy.editor import (
19
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
20
  CompositeVideoClip, TextClip
@@ -22,136 +9,81 @@ from moviepy.editor import (
22
  import moviepy.video.fx.all as vfx
23
  import moviepy.config as mpy_config
24
  from pydub import AudioSegment
25
- from pydub.generators import Sine
26
-
27
- from PIL import Image, ImageDraw, ImageFont
28
- import numpy as np
29
- from bs4 import BeautifulSoup
30
- import base64
31
- from urllib.parse import quote
32
- import pysrt
33
  from gtts import gTTS
34
- import gradio as gr # Import Gradio
 
 
35
 
36
- # Initialize Kokoro TTS pipeline (using American English)
37
- pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
38
- # Ensure ImageMagick binary is set
39
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
40
 
41
- # ---------------- Global Configuration ---------------- #
42
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
43
  OPENROUTER_API_KEY = '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'
44
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
45
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
46
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
 
47
 
48
- # ---------------- Helper Functions ---------------- #
49
- # (Your existing helper functions remain unchanged: generate_script, parse_script,
50
- # search_pexels_videos, search_pexels_images, search_google_images, download_image,
51
- # download_video, generate_media, generate_tts, apply_kenburns_effect,
52
- # resize_to_fill, find_mp3_files, add_background_music, create_clip,
53
- # fix_imagemagick_policy)
54
-
55
- # Define these globally as they were in your original code but will be set per run
56
- TARGET_RESOLUTION = None
57
- CAPTION_COLOR = None
58
- TEMP_FOLDER = None
59
 
 
60
  def generate_script(user_input):
61
- """Generate documentary script with proper OpenRouter handling."""
62
  headers = {
63
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
64
  'HTTP-Referer': 'https://your-domain.com',
65
  'X-Title': 'AI Documentary Maker'
66
  }
67
-
68
  prompt = f"""Short Documentary Script GeneratorInstructions:
69
-
70
  If I say "use this," just output the script exactly as I gave it.
71
  If I only give topics, generate a script based on them.
72
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
73
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
74
  Formatting Rules:
75
-
76
-
77
  Title in Square Brackets:
78
-
79
-
80
  Each section starts with a one-word title inside [ ] (max two words if necessary).
81
  This title will be used as a search term for Pexels footage.
82
-
83
-
84
-
85
  Casual & Funny Narration:
86
-
87
-
88
  Each section has 5-10 words of narration.
89
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
90
-
91
-
92
-
93
  No Special Formatting:
94
-
95
-
96
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
97
-
98
-
99
-
100
  Generalized Search Terms:
101
-
102
-
103
  If a term is too specific, make it more general for Pexels search.
104
-
105
-
106
-
107
  Scene-Specific Writing:
108
-
109
-
110
  Each section describes only what should be shown in the video.
111
-
112
-
113
-
114
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
115
-
116
-
117
  No extra text, just the script.
118
-
119
-
120
-
121
  Example Output:
122
  [North Korea]
123
-
124
  Top 5 unknown facts about North Korea.
125
-
126
  [Invisibility]
127
-
128
  North Korea’s internet speed is so fast… it doesn’t exist.
129
-
130
  [Leadership]
131
-
132
  Kim Jong-un once won an election with 100% votes… against himself.
133
-
134
  [Magic]
135
-
136
  North Korea discovered time travel. That’s why their news is always from the past.
137
-
138
  [Warning]
139
-
140
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
141
-
142
  [Freedom]
143
-
144
  North Korean citizens can do anything… as long as it's government-approved.
145
  Now here is the Topic/scrip: {user_input}
146
  """
147
-
148
  data = {
149
  'model': OPENROUTER_MODEL,
150
  'messages': [{'role': 'user', 'content': prompt}],
151
  'temperature': 0.4,
152
  'max_tokens': 5000
153
  }
154
-
155
  try:
156
  response = requests.post(
157
  'https://openrouter.ai/api/v1/chat/completions',
@@ -159,708 +91,313 @@ Now here is the Topic/scrip: {user_input}
159
  json=data,
160
  timeout=30
161
  )
162
-
163
  if response.status_code == 200:
164
- response_data = response.json()
165
- if 'choices' in response_data and len(response_data['choices']) > 0:
166
- return response_data['choices'][0]['message']['content']
167
- else:
168
- print("Unexpected response format:", response_data)
169
- return None
170
  else:
171
  print(f"API Error {response.status_code}: {response.text}")
172
  return None
173
-
174
  except Exception as e:
175
  print(f"Request failed: {str(e)}")
176
  return None
177
 
178
  def parse_script(script_text):
179
- """
180
- Parse the generated script into a list of elements.
181
- For each section, create two elements:
182
- - A 'media' element using the section title as the visual prompt.
183
- - A 'tts' element with the narration text, voice info, and computed duration.
184
- """
185
  sections = {}
186
  current_title = None
187
  current_text = ""
188
-
189
- try:
190
- for line in script_text.splitlines():
191
- line = line.strip()
192
- if line.startswith("[") and "]" in line:
193
- bracket_start = line.find("[")
194
- bracket_end = line.find("]", bracket_start)
195
- if bracket_start != -1 and bracket_end != -1:
196
- if current_title is not None:
197
- sections[current_title] = current_text.strip()
198
- current_title = line[bracket_start+1:bracket_end]
199
- current_text = line[bracket_end+1:].strip()
200
- elif current_title:
201
- current_text += line + " "
202
-
203
- if current_title:
204
- sections[current_title] = current_text.strip()
205
-
206
- elements = []
207
- for title, narration in sections.items():
208
- if not title or not narration:
209
- continue
210
-
211
- media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
212
- words = narration.split()
213
- duration = max(3, len(words) * 0.5)
214
- tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
215
- elements.append(media_element)
216
- elements.append(tts_element)
217
-
218
- return elements
219
- except Exception as e:
220
- print(f"Error parsing script: {e}")
221
- return []
222
 
223
  def search_pexels_videos(query, pexels_api_key):
224
- """Search for a video on Pexels by query and return a random HD video."""
225
  headers = {'Authorization': pexels_api_key}
226
- base_url = "https://api.pexels.com/videos/search"
227
- num_pages = 3
228
- videos_per_page = 15
229
-
230
- max_retries = 3
231
- retry_delay = 1
232
-
233
- search_query = query
234
- all_videos = []
235
-
236
- for page in range(1, num_pages + 1):
237
- for attempt in range(max_retries):
238
- try:
239
- params = {"query": search_query, "per_page": videos_per_page, "page": page}
240
- response = requests.get(base_url, headers=headers, params=params, timeout=10)
241
-
242
- if response.status_code == 200:
243
- data = response.json()
244
- videos = data.get("videos", [])
245
-
246
- if not videos:
247
- print(f"No videos found on page {page}.")
248
- break
249
-
250
- for video in videos:
251
- video_files = video.get("video_files", [])
252
- for file in video_files:
253
- if file.get("quality") == "hd":
254
- all_videos.append(file.get("link"))
255
- break
256
-
257
- break
258
-
259
- elif response.status_code == 429:
260
- print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
261
- time.sleep(retry_delay)
262
- retry_delay *= 2
263
- else:
264
- print(f"Error fetching videos: {response.status_code} {response.text}")
265
- if attempt < max_retries - 1:
266
- print(f"Retrying in {retry_delay} seconds...")
267
- time.sleep(retry_delay)
268
- retry_delay *= 2
269
- else:
270
- break
271
-
272
- except requests.exceptions.RequestException as e:
273
- print(f"Request exception: {e}")
274
- if attempt < max_retries - 1:
275
- print(f"Retrying in {retry_delay} seconds...")
276
- time.sleep(retry_delay)
277
- retry_delay *= 2
278
- else:
279
- break
280
-
281
- if all_videos:
282
- random_video = random.choice(all_videos)
283
- print(f"Selected random video from {len(all_videos)} HD videos")
284
- return random_video
285
- else:
286
- print("No suitable videos found after searching all pages.")
287
- return None
288
 
289
  def search_pexels_images(query, pexels_api_key):
290
- """Search for an image on Pexels by query."""
291
  headers = {'Authorization': pexels_api_key}
292
  url = "https://api.pexels.com/v1/search"
293
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
294
-
295
- max_retries = 3
296
- retry_delay = 1
297
-
298
- for attempt in range(max_retries):
299
- try:
300
- response = requests.get(url, headers=headers, params=params, timeout=10)
301
-
302
- if response.status_code == 200:
303
- data = response.json()
304
- photos = data.get("photos", [])
305
- if photos:
306
- photo = random.choice(photos[:min(5, len(photos))])
307
- img_url = photo.get("src", {}).get("original")
308
- return img_url
309
- else:
310
- print(f"No images found for query: {query}")
311
- return None
312
-
313
- elif response.status_code == 429:
314
- print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
315
- time.sleep(retry_delay)
316
- retry_delay *= 2
317
- else:
318
- print(f"Error fetching images: {response.status_code} {response.text}")
319
- if attempt < max_retries - 1:
320
- print(f"Retrying in {retry_delay} seconds...")
321
- time.sleep(retry_delay)
322
- retry_delay *= 2
323
-
324
- except requests.exceptions.RequestException as e:
325
- print(f"Request exception: {e}")
326
- if attempt < max_retries - 1:
327
- print(f"Retrying in {retry_delay} seconds...")
328
- time.sleep(retry_delay)
329
- retry_delay *= 2
330
-
331
- print(f"No Pexels images found for query: {query} after all attempts")
332
  return None
333
 
334
  def search_google_images(query):
335
- """Search for images on Google Images (for news-related queries)"""
 
336
  try:
337
- search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
338
- headers = {"User-Agent": USER_AGENT}
339
  response = requests.get(search_url, headers=headers, timeout=10)
340
  soup = BeautifulSoup(response.text, "html.parser")
341
-
342
- img_tags = soup.find_all("img")
343
- image_urls = []
344
- for img in img_tags:
345
- src = img.get("src", "")
346
- if src.startswith("http") and "gstatic" not in src:
347
- image_urls.append(src)
348
-
349
- if image_urls:
350
- return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
351
- else:
352
- print(f"No Google Images found for query: {query}")
353
- return None
354
  except Exception as e:
355
- print(f"Error in Google Images search: {e}")
356
  return None
357
 
358
  def download_image(image_url, filename):
359
- """Download an image from a URL to a local file with enhanced error handling."""
360
  try:
361
- headers = {"User-Agent": USER_AGENT}
362
- print(f"Downloading image from: {image_url} to {filename}")
363
- response = requests.get(image_url, headers=headers, stream=True, timeout=15)
364
  response.raise_for_status()
365
-
366
  with open(filename, 'wb') as f:
367
  for chunk in response.iter_content(chunk_size=8192):
368
  f.write(chunk)
369
-
370
- print(f"Image downloaded successfully to: {filename}")
371
-
372
- try:
373
- img = Image.open(filename)
374
- img.verify()
375
- img = Image.open(filename)
376
- if img.mode != 'RGB':
377
- img = img.convert('RGB')
378
- img.save(filename)
379
- print(f"Image validated and processed: {filename}")
380
- return filename
381
- except Exception as e_validate:
382
- print(f"Downloaded file is not a valid image: {e_validate}")
383
- if os.path.exists(filename):
384
- os.remove(filename)
385
- return None
386
-
387
- except requests.exceptions.RequestException as e_download:
388
- print(f"Image download error: {e_download}")
389
- if os.path.exists(filename):
390
- os.remove(filename)
391
- return None
392
- except Exception as e_general:
393
- print(f"General error during image processing: {e_general}")
394
- if os.path.exists(filename):
395
- os.remove(filename)
396
  return None
397
 
398
  def download_video(video_url, filename):
399
- """Download a video from a URL to a local file."""
400
  try:
401
  response = requests.get(video_url, stream=True, timeout=30)
402
  response.raise_for_status()
403
  with open(filename, 'wb') as f:
404
  for chunk in response.iter_content(chunk_size=8192):
405
  f.write(chunk)
406
- print(f"Video downloaded successfully to: {filename}")
407
  return filename
408
  except Exception as e:
409
  print(f"Video download error: {e}")
410
- if os.path.exists(filename):
411
- os.remove(filename)
412
  return None
413
 
414
- def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
415
- """
416
- Generate a visual asset by first searching for a video or using a specific search strategy.
417
- For news-related queries, use Google Images.
418
- Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
419
- """
420
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
421
-
422
  if "news" in prompt.lower():
423
- print(f"News-related query detected: {prompt}. Using Google Images...")
424
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
425
  image_url = search_google_images(prompt)
426
- if image_url:
427
- downloaded_image = download_image(image_url, image_file)
428
- if downloaded_image:
429
- print(f"News image saved to {downloaded_image}")
430
- return {"path": downloaded_image, "asset_type": "image"}
431
- else:
432
- print(f"Google Images search failed for prompt: {prompt}")
433
-
434
- if random.random() < 0.25:
435
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
436
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
437
- if video_url:
438
- downloaded_video = download_video(video_url, video_file)
439
- if downloaded_video:
440
- print(f"Video asset saved to {downloaded_video}")
441
- return {"path": downloaded_video, "asset_type": "video"}
442
- else:
443
- print(f"Pexels video search failed for prompt: {prompt}")
444
-
445
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
446
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
447
- if image_url:
448
- downloaded_image = download_image(image_url, image_file)
449
- if downloaded_image:
450
- print(f"Image asset saved to {downloaded_image}")
451
- return {"path": downloaded_image, "asset_type": "image"}
452
- else:
453
- print(f"Pexels image download failed for prompt: {prompt}")
454
-
455
- fallback_terms = ["nature", "people", "landscape", "technology", "business"]
456
- for term in fallback_terms:
457
- print(f"Trying fallback image search with term: {term}")
458
- fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
459
- fallback_url = search_pexels_images(term, PEXELS_API_KEY)
460
- if fallback_url:
461
- downloaded_fallback = download_image(fallback_url, fallback_file)
462
- if downloaded_fallback:
463
- print(f"Fallback image saved to {downloaded_fallback}")
464
- return {"path": downloaded_fallback, "asset_type": "image"}
465
- else:
466
- print(f"Fallback image download failed for term: {term}")
467
- else:
468
- print(f"Fallback image search failed for term: {term}")
469
-
470
- print(f"Failed to generate visual asset for prompt: {prompt}")
471
  return None
472
 
473
- def generate_silent_audio(duration, sample_rate=24000):
474
- """Generate a silent WAV audio file lasting 'duration' seconds."""
475
- num_samples = int(duration * sample_rate)
476
- silence = np.zeros(num_samples, dtype=np.float32)
477
- silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
478
- sf.write(silent_path, silence, sample_rate)
479
- print(f"Silent audio generated: {silent_path}")
480
- return silent_path
481
-
482
  def generate_tts(text, voice):
483
- """
484
- Generate TTS audio using Kokoro, falling back to gTTS or silent audio if needed.
485
- """
486
  safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
487
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
488
-
489
  if os.path.exists(file_path):
490
- print(f"Using cached TTS for text '{text[:10]}...'")
491
  return file_path
492
-
493
  try:
494
- kokoro_voice = 'af_heart' if voice == 'en' else voice
495
- generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
496
- audio_segments = []
497
- for i, (gs, ps, audio) in enumerate(generator):
498
- audio_segments.append(audio)
499
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
500
  sf.write(file_path, full_audio, 24000)
501
- print(f"TTS audio saved to {file_path} (Kokoro)")
502
  return file_path
503
- except Exception as e:
504
- print(f"Error with Kokoro TTS: {e}")
505
- try:
506
- print("Falling back to gTTS...")
507
- tts = gTTS(text=text, lang='en')
508
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
509
- tts.save(mp3_path)
510
- audio = AudioSegment.from_mp3(mp3_path)
511
- audio.export(file_path, format="wav")
512
- os.remove(mp3_path)
513
- print(f"Fallback TTS saved to {file_path} (gTTS)")
514
- return file_path
515
- except Exception as fallback_error:
516
- print(f"Both TTS methods failed: {fallback_error}")
517
- return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
518
 
519
- def apply_kenburns_effect(clip, target_resolution, effect_type=None):
520
- """Apply a smooth Ken Burns effect with a single movement pattern."""
521
  target_w, target_h = target_resolution
522
  clip_aspect = clip.w / clip.h
523
  target_aspect = target_w / target_h
524
-
525
  if clip_aspect > target_aspect:
526
- new_height = target_h
527
- new_width = int(new_height * clip_aspect)
528
  else:
529
- new_width = target_w
530
- new_height = int(new_width / clip_aspect)
531
-
532
- clip = clip.resize(newsize=(new_width, new_height))
533
- base_scale = 1.15
534
- new_width = int(new_width * base_scale)
535
- new_height = int(new_height * base_scale)
536
- clip = clip.resize(newsize=(new_width, new_height))
537
-
538
- max_offset_x = new_width - target_w
539
- max_offset_y = new_height - target_h
540
-
541
- available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
542
- if effect_type is None or effect_type == "random":
543
- effect_type = random.choice(available_effects)
544
-
545
- if effect_type == "zoom-in":
546
- start_zoom = 0.9
547
- end_zoom = 1.1
548
- start_center = (new_width / 2, new_height / 2)
549
- end_center = start_center
550
- elif effect_type == "zoom-out":
551
- start_zoom = 1.1
552
- end_zoom = 0.9
553
- start_center = (new_width / 2, new_height / 2)
554
- end_center = start_center
555
- elif effect_type == "pan-left":
556
- start_zoom = 1.0
557
- end_zoom = 1.0
558
- start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
559
- end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
560
- elif effect_type == "pan-right":
561
- start_zoom = 1.0
562
- end_zoom = 1.0
563
- start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
564
- end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
565
- elif effect_type == "up-left":
566
- start_zoom = 1.0
567
- end_zoom = 1.0
568
- start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
569
- end_center = (target_w / 2, target_h / 2)
570
- else:
571
- raise ValueError(f"Unsupported effect_type: {effect_type}")
572
-
573
- def transform_frame(get_frame, t):
574
- frame = get_frame(t)
575
- ratio = t / clip.duration if clip.duration > 0 else 0
576
- ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
577
- current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
578
- crop_w = int(target_w / current_zoom)
579
- crop_h = int(target_h / current_zoom)
580
- current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
581
- current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
582
- min_center_x = crop_w / 2
583
- max_center_x = new_width - crop_w / 2
584
- min_center_y = crop_h / 2
585
- max_center_y = new_height - crop_h / 2
586
- current_center_x = max(min_center_x, min(current_center_x, max_center_x))
587
- current_center_y = max(min_center_y, min(current_center_y, max_center_y))
588
- cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
589
- resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
590
- return resized_frame
591
-
592
- return clip.fl(transform_frame)
593
 
594
  def resize_to_fill(clip, target_resolution):
595
- """Resize and crop a clip to fill the target resolution while maintaining aspect ratio."""
596
  target_w, target_h = target_resolution
597
  clip_aspect = clip.w / clip.h
598
  target_aspect = target_w / target_h
599
-
600
  if clip_aspect > target_aspect:
601
  clip = clip.resize(height=target_h)
602
  crop_amount = (clip.w - target_w) / 2
603
- clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
604
  else:
605
  clip = clip.resize(width=target_w)
606
  crop_amount = (clip.h - target_h) / 2
607
- clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
608
-
609
  return clip
610
 
611
- def find_mp3_files():
612
- """Search for any MP3 files in the current directory and subdirectories."""
613
- mp3_files = []
614
- for root, dirs, files in os.walk('.'):
615
- for file in files:
616
- if file.endswith('.mp3'):
617
- mp3_path = os.path.join(root, file)
618
- mp3_files.append(mp3_path)
619
- print(f"Found MP3 file: {mp3_path}")
620
- return mp3_files[0] if mp3_files else None
621
-
622
- def add_background_music(final_video, bg_music_volume=0.08):
623
- """Add background music to the final video using any MP3 file found."""
624
- try:
625
- bg_music_path = find_mp3_files()
626
- if bg_music_path and os.path.exists(bg_music_path):
627
- print(f"Adding background music from: {bg_music_path}")
628
- bg_music = AudioFileClip(bg_music_path)
629
- if bg_music.duration < final_video.duration:
630
- loops_needed = math.ceil(final_video.duration / bg_music.duration)
631
- bg_segments = [bg_music] * loops_needed
632
- bg_music = concatenate_audioclips(bg_segments)
633
- bg_music = bg_music.subclip(0, final_video.duration)
634
- bg_music = bg_music.volumex(bg_music_volume)
635
- video_audio = final_video.audio
636
- mixed_audio = CompositeAudioClip([video_audio, bg_music])
637
- final_video = final_video.set_audio(mixed_audio)
638
- print("Background music added successfully")
639
- else:
640
- print("No MP3 files found, skipping background music")
641
- return final_video
642
- except Exception as e:
643
- print(f"Error adding background music: {e}")
644
- print("Continuing without background music")
645
- return final_video
646
-
647
- def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
648
- """Create a video clip with synchronized subtitles and narration."""
649
- try:
650
- print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
651
- if not os.path.exists(media_path) or not os.path.exists(tts_path):
652
- print("Missing media or TTS file")
653
- return None
654
-
655
- audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
656
- audio_duration = audio_clip.duration
657
- target_duration = audio_duration + 0.2
658
-
659
- if asset_type == "video":
660
- clip = VideoFileClip(media_path)
661
- clip = resize_to_fill(clip, TARGET_RESOLUTION)
662
- if clip.duration < target_duration:
663
- clip = clip.loop(duration=target_duration)
664
- else:
665
- clip = clip.subclip(0, target_duration)
666
- elif asset_type == "image":
667
- img = Image.open(media_path)
668
- if img.mode != 'RGB':
669
- with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp:
670
- img.convert('RGB').save(temp.name)
671
- media_path = temp.name
672
- img.close()
673
- clip = ImageClip(media_path).set_duration(target_duration)
674
- clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
675
- clip = clip.fadein(0.3).fadeout(0.3)
676
- else:
677
- return None
678
-
679
- if narration_text and CAPTION_COLOR != "transparent":
680
- try:
681
- words = narration_text.split()
682
- chunks = []
683
- current_chunk = []
684
- for word in words:
685
- current_chunk.append(word)
686
- if len(current_chunk) >= 5:
687
- chunks.append(' '.join(current_chunk))
688
- current_chunk = []
689
- if current_chunk:
690
- chunks.append(' '.join(current_chunk))
691
-
692
- chunk_duration = audio_duration / len(chunks)
693
- subtitle_clips = []
694
- subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
695
-
696
- for i, chunk_text in enumerate(chunks):
697
- start_time = i * chunk_duration
698
- end_time = (i + 1) * chunk_duration
699
- txt_clip = TextClip(
700
- chunk_text,
701
- fontsize=45,
702
- font='Arial-Bold',
703
- color=CAPTION_COLOR,
704
- bg_color='rgba(0, 0, 0, 0.25)',
705
- method='caption',
706
- align='center',
707
- stroke_width=2,
708
- stroke_color=CAPTION_COLOR,
709
- size=(TARGET_RESOLUTION[0] * 0.8, None)
710
- ).set_start(start_time).set_end(end_time)
711
- txt_clip = txt_clip.set_position(('center', subtitle_y_position))
712
- subtitle_clips.append(txt_clip)
713
-
714
- clip = CompositeVideoClip([clip] + subtitle_clips)
715
- except Exception as sub_error:
716
- print(f"Subtitle error: {sub_error}")
717
- txt_clip = TextClip(
718
- narration_text,
719
- fontsize=28,
720
- color=CAPTION_COLOR,
721
- align='center',
722
- size=(TARGET_RESOLUTION[0] * 0.7, None)
723
- ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
724
- clip = CompositeVideoClip([clip, txt_clip])
725
-
726
- clip = clip.set_audio(audio_clip)
727
- print(f"Clip created: {clip.duration:.1f}s")
728
- return clip
729
- except Exception as e:
730
- print(f"Error in create_clip: {str(e)}")
731
- return None
732
-
733
- def fix_imagemagick_policy():
734
- """Fix ImageMagick security policies."""
735
- try:
736
- print("Attempting to fix ImageMagick security policies...")
737
- policy_paths = [
738
- "/etc/ImageMagick-6/policy.xml",
739
- "/etc/ImageMagick-7/policy.xml",
740
- "/etc/ImageMagick/policy.xml",
741
- "/usr/local/etc/ImageMagick-7/policy.xml"
742
- ]
743
- found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
744
- if not found_policy:
745
- print("No policy.xml found. Using alternative subtitle method.")
746
- return False
747
- print(f"Modifying policy file at {found_policy}")
748
- os.system(f"sudo cp {found_policy} {found_policy}.bak")
749
- os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
750
- os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
751
- os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
752
- print("ImageMagick policies updated successfully.")
753
- return True
754
- except Exception as e:
755
- print(f"Error fixing policies: {e}")
756
- return False
757
-
758
- # ---------------- Main Function with Gradio Integration ---------------- #
759
- def generate_video(user_input, resolution, caption_option):
760
- """Generate a video based on user input via Gradio."""
761
- global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
762
- import shutil
763
-
764
- # Set resolution
765
- if resolution == "Full":
766
- TARGET_RESOLUTION = (1920, 1080)
767
- elif resolution == "Short":
768
- TARGET_RESOLUTION = (1080, 1920)
769
  else:
770
- TARGET_RESOLUTION = (1920, 1080) # Default
771
-
772
- # Set caption color
773
- CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
774
-
775
- # Create a unique temporary folder
776
- TEMP_FOLDER = tempfile.mkdtemp()
777
-
778
- # Fix ImageMagick policy
779
- fix_success = fix_imagemagick_policy()
780
- if not fix_success:
781
- print("Will use alternative methods if needed")
782
-
783
- print("Generating script from API...")
784
- script = generate_script(user_input)
785
- if not script:
786
- print("Failed to generate script.")
787
- shutil.rmtree(TEMP_FOLDER)
788
- return None
789
- print("Generated Script:\n", script)
790
- elements = parse_script(script)
791
- if not elements:
792
- print("Failed to parse script into elements.")
793
- shutil.rmtree(TEMP_FOLDER)
794
- return None
795
- print(f"Parsed {len(elements)//2} script segments.")
796
-
797
- paired_elements = []
798
- for i in range(0, len(elements), 2):
799
- if i + 1 < len(elements):
800
- paired_elements.append((elements[i], elements[i + 1]))
801
-
802
- if not paired_elements:
803
- print("No valid script segments found.")
804
- shutil.rmtree(TEMP_FOLDER)
805
- return None
806
-
807
  clips = []
808
- for idx, (media_elem, tts_elem) in enumerate(paired_elements):
809
- print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
810
- media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
811
- if not media_asset:
812
- print(f"Skipping segment {idx+1} due to missing media asset.")
813
- continue
814
- tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
815
- if not tts_path:
816
- print(f"Skipping segment {idx+1} due to TTS generation failure.")
817
- continue
818
- clip = create_clip(
819
- media_path=media_asset['path'],
820
- asset_type=media_asset['asset_type'],
821
- tts_path=tts_path,
822
- duration=tts_elem['duration'],
823
- effects=media_elem.get('effects', 'fade-in'),
824
- narration_text=tts_elem['text'],
825
- segment_index=idx
826
- )
827
- if clip:
828
  clips.append(clip)
829
- else:
830
- print(f"Clip creation failed for segment {idx+1}.")
831
-
832
  if not clips:
833
- print("No clips were successfully created.")
834
- shutil.rmtree(TEMP_FOLDER)
835
  return None
836
-
837
- print("\nConcatenating clips...")
838
  final_video = concatenate_videoclips(clips, method="compose")
839
- final_video = add_background_music(final_video, bg_music_volume=0.08)
840
-
841
- print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
842
- final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset='veryfast')
843
- print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
844
-
845
- # Clean up
846
- print("Cleaning up temporary files...")
847
  shutil.rmtree(TEMP_FOLDER)
848
- print("Temporary files removed.")
849
-
850
  return OUTPUT_VIDEO_FILENAME
851
 
852
- # ---------------- Gradio Interface ---------------- #
853
- iface = gr.Interface(
854
- fn=generate_video,
855
- inputs=[
856
- gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
857
- gr.Radio(["Full", "Short"], label="Resolution", value="Full"),
858
- gr.Radio(["Yes", "No"], label="Captions", value="Yes")
859
- ],
860
- outputs=gr.Video(label="Generated Video"),
861
- title="AI Documentary Video Generator",
862
- description="Create a funny documentary-style video based on your concept. Note: Generation may take several minutes on CPU."
863
- )
864
-
865
- # Launch the interface
866
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Import necessary libraries
2
  from kokoro import KPipeline
 
 
 
 
3
  import soundfile as sf
4
  import os
 
 
 
 
 
 
 
5
  from moviepy.editor import (
6
  VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
7
  CompositeVideoClip, TextClip
 
9
  import moviepy.video.fx.all as vfx
10
  import moviepy.config as mpy_config
11
  from pydub import AudioSegment
12
+ from PIL import Image
13
+ import tempfile
14
+ import random
15
+ import cv2
16
+ import math
17
+ import requests
 
 
18
  from gtts import gTTS
19
+ import gradio as gr
20
+ import numpy as np
21
+ import shutil
22
 
23
+ # Initialize Kokoro TTS pipeline
24
+ pipeline = KPipeline(lang_code='a')
25
+ # Ensure ImageMagick binary is set (adjust path as needed)
26
  mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
27
 
28
+ # Global Configuration
29
  PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
30
  OPENROUTER_API_KEY = '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'
31
  OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
32
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
33
  USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
34
+ MAX_CLIPS = 10 # Maximum number of editable clips
35
 
36
+ # Temporary folder setup
37
+ TEMP_FOLDER = tempfile.mkdtemp()
 
 
 
 
 
 
 
 
 
38
 
39
+ # Existing Helper Functions (unchanged)
40
  def generate_script(user_input):
 
41
  headers = {
42
  'Authorization': f'Bearer {OPENROUTER_API_KEY}',
43
  'HTTP-Referer': 'https://your-domain.com',
44
  'X-Title': 'AI Documentary Maker'
45
  }
 
46
  prompt = f"""Short Documentary Script GeneratorInstructions:
 
47
  If I say "use this," just output the script exactly as I gave it.
48
  If I only give topics, generate a script based on them.
49
  If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
50
  And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
51
  Formatting Rules:
 
 
52
  Title in Square Brackets:
 
 
53
  Each section starts with a one-word title inside [ ] (max two words if necessary).
54
  This title will be used as a search term for Pexels footage.
 
 
 
55
  Casual & Funny Narration:
 
 
56
  Each section has 5-10 words of narration.
57
  Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
 
 
 
58
  No Special Formatting:
 
 
59
  No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
 
 
 
60
  Generalized Search Terms:
 
 
61
  If a term is too specific, make it more general for Pexels search.
 
 
 
62
  Scene-Specific Writing:
 
 
63
  Each section describes only what should be shown in the video.
 
 
 
64
  Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
 
 
65
  No extra text, just the script.
 
 
 
66
  Example Output:
67
  [North Korea]
 
68
  Top 5 unknown facts about North Korea.
 
69
  [Invisibility]
 
70
  North Korea’s internet speed is so fast… it doesn’t exist.
 
71
  [Leadership]
 
72
  Kim Jong-un once won an election with 100% votes… against himself.
 
73
  [Magic]
 
74
  North Korea discovered time travel. That’s why their news is always from the past.
 
75
  [Warning]
 
76
  Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
 
77
  [Freedom]
 
78
  North Korean citizens can do anything… as long as it's government-approved.
79
  Now here is the Topic/scrip: {user_input}
80
  """
 
81
  data = {
82
  'model': OPENROUTER_MODEL,
83
  'messages': [{'role': 'user', 'content': prompt}],
84
  'temperature': 0.4,
85
  'max_tokens': 5000
86
  }
 
87
  try:
88
  response = requests.post(
89
  'https://openrouter.ai/api/v1/chat/completions',
 
91
  json=data,
92
  timeout=30
93
  )
 
94
  if response.status_code == 200:
95
+ return response.json()['choices'][0]['message']['content']
 
 
 
 
 
96
  else:
97
  print(f"API Error {response.status_code}: {response.text}")
98
  return None
 
99
  except Exception as e:
100
  print(f"Request failed: {str(e)}")
101
  return None
102
 
103
  def parse_script(script_text):
 
 
 
 
 
 
104
  sections = {}
105
  current_title = None
106
  current_text = ""
107
+ for line in script_text.splitlines():
108
+ line = line.strip()
109
+ if line.startswith("[") and "]" in line:
110
+ bracket_start = line.find("[")
111
+ bracket_end = line.find("]", bracket_start)
112
+ if bracket_start != -1 and bracket_end != -1:
113
+ if current_title is not None:
114
+ sections[current_title] = current_text.strip()
115
+ current_title = line[bracket_start+1:bracket_end]
116
+ current_text = line[bracket_end+1:].strip()
117
+ elif current_title:
118
+ current_text += line + " "
119
+ if current_title:
120
+ sections[current_title] = current_text.strip()
121
+ clips = [{"title": title, "narration": narration} for title, narration in sections.items()]
122
+ return clips
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  def search_pexels_videos(query, pexels_api_key):
 
125
  headers = {'Authorization': pexels_api_key}
126
+ url = "https://api.pexels.com/videos/search"
127
+ params = {"query": query, "per_page": 15}
128
+ try:
129
+ response = requests.get(url, headers=headers, params=params, timeout=10)
130
+ if response.status_code == 200:
131
+ videos = response.json().get("videos", [])
132
+ hd_videos = [v["video_files"][0]["link"] for v in videos if v["video_files"] and v["video_files"][0]["quality"] == "hd"]
133
+ return random.choice(hd_videos) if hd_videos else None
134
+ except Exception as e:
135
+ print(f"Video search error: {e}")
136
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  def search_pexels_images(query, pexels_api_key):
 
139
  headers = {'Authorization': pexels_api_key}
140
  url = "https://api.pexels.com/v1/search"
141
  params = {"query": query, "per_page": 5, "orientation": "landscape"}
142
+ try:
143
+ response = requests.get(url, headers=headers, params=params, timeout=10)
144
+ if response.status_code == 200:
145
+ photos = response.json().get("photos", [])
146
+ return random.choice(photos)["src"]["original"] if photos else None
147
+ except Exception as e:
148
+ print(f"Image search error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  return None
150
 
151
  def search_google_images(query):
152
+ search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
153
+ headers = {"User-Agent": USER_AGENT}
154
  try:
 
 
155
  response = requests.get(search_url, headers=headers, timeout=10)
156
  soup = BeautifulSoup(response.text, "html.parser")
157
+ img_urls = [img["src"] for img in soup.find_all("img") if img.get("src", "").startswith("http") and "gstatic" not in img["src"]]
158
+ return random.choice(img_urls[:5]) if img_urls else None
 
 
 
 
 
 
 
 
 
 
 
159
  except Exception as e:
160
+ print(f"Google Images error: {e}")
161
  return None
162
 
163
  def download_image(image_url, filename):
 
164
  try:
165
+ response = requests.get(image_url, headers={"User-Agent": USER_AGENT}, stream=True, timeout=15)
 
 
166
  response.raise_for_status()
 
167
  with open(filename, 'wb') as f:
168
  for chunk in response.iter_content(chunk_size=8192):
169
  f.write(chunk)
170
+ img = Image.open(filename)
171
+ if img.mode != 'RGB':
172
+ img = img.convert('RGB')
173
+ img.save(filename)
174
+ return filename
175
+ except Exception as e:
176
+ print(f"Image download error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  return None
178
 
179
  def download_video(video_url, filename):
 
180
  try:
181
  response = requests.get(video_url, stream=True, timeout=30)
182
  response.raise_for_status()
183
  with open(filename, 'wb') as f:
184
  for chunk in response.iter_content(chunk_size=8192):
185
  f.write(chunk)
 
186
  return filename
187
  except Exception as e:
188
  print(f"Video download error: {e}")
 
 
189
  return None
190
 
191
+ def generate_media(prompt, custom_media=None, video_prob=0.25):
192
+ if custom_media:
193
+ asset_type = "video" if custom_media.endswith(('.mp4', '.avi', '.mov')) else "image"
194
+ return {"path": custom_media, "asset_type": asset_type}
 
 
195
  safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
 
196
  if "news" in prompt.lower():
 
197
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
198
  image_url = search_google_images(prompt)
199
+ if image_url and download_image(image_url, image_file):
200
+ return {"path": image_file, "asset_type": "image"}
201
+ if random.random() < video_prob:
 
 
 
 
 
 
202
  video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
203
  video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
204
+ if video_url and download_video(video_url, video_file):
205
+ return {"path": video_file, "asset_type": "video"}
 
 
 
 
 
 
206
  image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
207
  image_url = search_pexels_images(prompt, PEXELS_API_KEY)
208
+ if image_url and download_image(image_url, image_file):
209
+ return {"path": image_file, "asset_type": "image"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  return None
211
 
 
 
 
 
 
 
 
 
 
212
  def generate_tts(text, voice):
 
 
 
213
  safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
214
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
 
215
  if os.path.exists(file_path):
 
216
  return file_path
 
217
  try:
218
+ audio_segments = [audio for _, _, audio in pipeline(text, voice='af_heart', speed=0.9, split_pattern=r'\n+')]
 
 
 
 
219
  full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
220
  sf.write(file_path, full_audio, 24000)
 
221
  return file_path
222
+ except Exception:
223
+ tts = gTTS(text=text, lang='en')
224
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
225
+ tts.save(mp3_path)
226
+ AudioSegment.from_mp3(mp3_path).export(file_path, format="wav")
227
+ os.remove(mp3_path)
228
+ return file_path
 
 
 
 
 
 
 
 
229
 
230
+ def apply_kenburns_effect(clip, target_resolution):
 
231
  target_w, target_h = target_resolution
232
  clip_aspect = clip.w / clip.h
233
  target_aspect = target_w / target_h
 
234
  if clip_aspect > target_aspect:
235
+ new_width = int(target_h * clip_aspect)
236
+ clip = clip.resize(width=new_width)
237
  else:
238
+ new_height = int(target_w / clip_aspect)
239
+ clip = clip.resize(height=new_height)
240
+ clip = clip.resize(zoom=1.15)
241
+ return vfx.crop(clip.fx(vfx.resize, width=target_w * 1.1), width=target_w, height=target_h, x_center=clip.w / 2, y_center=clip.h / 2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  def resize_to_fill(clip, target_resolution):
 
244
  target_w, target_h = target_resolution
245
  clip_aspect = clip.w / clip.h
246
  target_aspect = target_w / target_h
 
247
  if clip_aspect > target_aspect:
248
  clip = clip.resize(height=target_h)
249
  crop_amount = (clip.w - target_w) / 2
250
+ clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount)
251
  else:
252
  clip = clip.resize(width=target_w)
253
  crop_amount = (clip.h - target_h) / 2
254
+ clip = clip.crop(y1=crop_amount, y2=clip.h - crop_amount)
 
255
  return clip
256
 
257
+ def add_background_music(final_video, bgm_path=None, bgm_volume=0.15):
258
+ if bgm_path and os.path.exists(bgm_path):
259
+ bg_music = AudioFileClip(bgm_path)
260
+ if bg_music.duration < final_video.duration:
261
+ loops_needed = math.ceil(final_video.duration / bg_music.duration)
262
+ bg_music = concatenate_audioclips([bg_music] * loops_needed)
263
+ bg_music = bg_music.subclip(0, final_video.duration).volumex(bgm_volume)
264
+ mixed_audio = CompositeAudioClip([final_video.audio, bg_music])
265
+ return final_video.set_audio(mixed_audio)
266
+ return final_video
267
+
268
+ def create_clip(media_path, asset_type, tts_path, narration_text, target_resolution, subtitles_enabled, font, font_size, outline_width, font_color, outline_color, position, zoom_pan_effect):
269
+ audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
270
+ target_duration = audio_clip.duration + 0.2
271
+ if asset_type == "video":
272
+ clip = VideoFileClip(media_path)
273
+ clip = resize_to_fill(clip, target_resolution)
274
+ clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  else:
276
+ clip = ImageClip(media_path).set_duration(target_duration).fadein(0.3).fadeout(0.3)
277
+ if zoom_pan_effect:
278
+ clip = apply_kenburns_effect(clip, target_resolution)
279
+ clip = resize_to_fill(clip, target_resolution)
280
+ if subtitles_enabled and narration_text:
281
+ words = narration_text.split()
282
+ chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)]
283
+ chunk_duration = audio_clip.duration / len(chunks)
284
+ subtitle_clips = []
285
+ y_position = target_resolution[1] * (0.1 if position == "top" else 0.8 if position == "bottom" else 0.5)
286
+ for i, chunk in enumerate(chunks):
287
+ txt_clip = TextClip(
288
+ chunk,
289
+ fontsize=font_size,
290
+ font=font,
291
+ color=font_color,
292
+ stroke_color=outline_color,
293
+ stroke_width=outline_width,
294
+ method='caption',
295
+ align='center',
296
+ size=(target_resolution[0] * 0.8, None)
297
+ ).set_start(i * chunk_duration).set_end((i + 1) * chunk_duration).set_position(('center', y_position))
298
+ subtitle_clips.append(txt_clip)
299
+ clip = CompositeVideoClip([clip] + subtitle_clips)
300
+ return clip.set_audio(audio_clip)
301
+
302
+ # Main Video Generation Function
303
+ def generate_video(resolution, render_speed, video_clip_percent, zoom_pan_effect, bgm_upload, bgm_volume, subtitles_enabled, font, font_size, outline_width, font_color, outline_color, position, *clip_data):
304
+ target_resolution = (1080, 1920) if resolution == "Short (1080x1920)" else (1920, 1080)
 
 
 
 
 
 
 
 
305
  clips = []
306
+ for i in range(0, len(clip_data), 3):
307
+ prompt, narration, custom_media = clip_data[i], clip_data[i+1], clip_data[i+2]
308
+ if prompt.strip() or narration.strip():
309
+ media_asset = generate_media(prompt, custom_media, video_clip_percent / 100.0)
310
+ if not media_asset:
311
+ continue
312
+ tts_path = generate_tts(narration, 'en')
313
+ clip = create_clip(
314
+ media_asset['path'], media_asset['asset_type'], tts_path, narration, target_resolution,
315
+ subtitles_enabled, font, font_size, outline_width, font_color, outline_color, position, zoom_pan_effect
316
+ )
 
 
 
 
 
 
 
 
 
317
  clips.append(clip)
 
 
 
318
  if not clips:
 
 
319
  return None
 
 
320
  final_video = concatenate_videoclips(clips, method="compose")
321
+ final_video = add_background_music(final_video, bgm_upload, bgm_volume)
322
+ final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24, preset=render_speed)
 
 
 
 
 
 
323
  shutil.rmtree(TEMP_FOLDER)
 
 
324
  return OUTPUT_VIDEO_FILENAME
325
 
326
+ # Load Clips Function
327
+ def load_clips(topic, script):
328
+ raw_script = script.strip() if script.strip() else generate_script(topic)
329
+ clips = parse_script(raw_script)[:MAX_CLIPS]
330
+ updates = [gr.update(value=raw_script)]
331
+ for i in range(MAX_CLIPS):
332
+ if i < len(clips):
333
+ updates.extend([
334
+ gr.update(value=clips[i]["title"]),
335
+ gr.update(value=clips[i]["narration"]),
336
+ gr.update(value=None),
337
+ gr.update(visible=True)
338
+ ])
339
+ else:
340
+ updates.extend([gr.update(value=""), gr.update(value=""), gr.update(value=None), gr.update(visible=False)])
341
+ return updates
342
+
343
+ # Gradio Interface
344
+ with gr.Blocks(title="πŸš€ Orbit Video Engine") as app:
345
+ with gr.Row():
346
+ # Column 1: Content Input & Script Generation
347
+ with gr.Column():
348
+ gr.Markdown("### 1. Content Input")
349
+ topic_input = gr.Textbox(label="Video Topic", placeholder="e.g., Funny Cat Facts")
350
+ script_input = gr.Textbox(label="Or Paste Full Script", lines=10, placeholder="[Title]\nNarration...")
351
+ generate_script_btn = gr.Button("πŸ“ Generate Script & Load Clips")
352
+ generated_script_display = gr.Textbox(label="Generated Script", interactive=False)
353
+
354
+ # Column 2: Clip Editor
355
+ with gr.Column():
356
+ gr.Markdown("### 2. Edit Clips")
357
+ gr.Markdown("Modify prompts, narration, and upload custom media for each clip.")
358
+ clip_rows, prompts, narrations, custom_medias = [], [], [], []
359
+ for i in range(MAX_CLIPS):
360
+ with gr.Row(visible=False) as row:
361
+ prompt = gr.Textbox(label="Visual Prompt")
362
+ narration = gr.Textbox(label="Narration", lines=3)
363
+ custom_media = gr.File(label="Upload Custom Media (Image/Video)", file_types=["image", "video"])
364
+ clip_rows.append(row)
365
+ prompts.append(prompt)
366
+ narrations.append(narration)
367
+ custom_medias.append(custom_media)
368
+
369
+ # Column 3: Settings & Output
370
+ with gr.Column():
371
+ gr.Markdown("### 3. Video Settings")
372
+ resolution = gr.Radio(["Short (1080x1920)", "Full HD (1920x1080)"], label="Resolution", value="Full HD (1920x1080)")
373
+ render_speed = gr.Dropdown(["ultrafast", "veryfast", "fast", "medium", "slow", "veryslow"], label="Render Speed", value="fast")
374
+ video_clip_percent = gr.Slider(0, 100, value=25, label="Video Clip Percentage")
375
+ zoom_pan_effect = gr.Checkbox(label="Add Zoom/Pan Effect (Images)", value=True)
376
+ with gr.Accordion("Background Music", open=False):
377
+ bgm_upload = gr.Audio(label="Upload Background Music", type="filepath")
378
+ bgm_volume = gr.Slider(0.0, 1.0, value=0.15, label="BGM Volume")
379
+ with gr.Accordion("Subtitle Settings", open=True):
380
+ subtitles_enabled = gr.Checkbox(label="Enable Subtitles", value=True)
381
+ font = gr.Dropdown(["Impact", "Arial", "Times New Roman"], label="Font", value="Arial")
382
+ font_size = gr.Number(label="Font Size", value=45)
383
+ outline_width = gr.Number(label="Outline Width", value=2)
384
+ font_color = gr.ColorPicker(label="Font Color", value="#FFFFFF")
385
+ outline_color = gr.ColorPicker(label="Outline Color", value="#000000")
386
+ position = gr.Radio(["top", "center", "bottom"], label="Position", value="bottom")
387
+ generate_video_btn = gr.Button("🎬 Generate Video")
388
+ gr.Markdown("### 4. Output")
389
+ video_output = gr.Video(label="Generated Video")
390
+
391
+ # Event Handlers
392
+ generate_script_btn.click(
393
+ load_clips,
394
+ inputs=[topic_input, script_input],
395
+ outputs=[generated_script_display] + prompts + narrations + custom_medias + clip_rows
396
+ )
397
+ generate_video_btn.click(
398
+ generate_video,
399
+ inputs=[resolution, render_speed, video_clip_percent, zoom_pan_effect, bgm_upload, bgm_volume, subtitles_enabled, font, font_size, outline_width, font_color, outline_color, position] + prompts + narrations + custom_medias,
400
+ outputs=video_output
401
+ )
402
+
403
+ app.launch(share=True)