testdeep123 commited on
Commit
2f63054
·
verified ·
1 Parent(s): 973ed6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +841 -606
app.py CHANGED
@@ -1,632 +1,867 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- """
4
- AI Documentary Video Generator
5
- Version: 2.0
6
- Author: AI Assistant
7
- Description: Creates documentary-style videos from text prompts using TTS, media APIs, and video processing
8
- """
9
 
10
- import os
11
- import sys
12
- import re
13
- import time
14
- import random
15
- import tempfile
16
- import shutil
17
- import traceback
18
- import math
19
- from typing import Optional, Tuple, Dict, List, Union
20
- from dataclasses import dataclass
21
- from concurrent.futures import ThreadPoolExecutor
22
- from urllib.parse import quote
23
 
24
- # Third-party imports
25
- import numpy as np
26
- import cv2
27
- import requests
28
- from PIL import Image
29
  import soundfile as sf
30
  import torch
31
- from bs4 import BeautifulSoup
32
- import gradio as gr
33
 
34
- # MoviePy imports
 
 
 
 
 
 
 
 
35
  from moviepy.editor import (
36
- VideoFileClip, AudioFileClip, ImageClip,
37
- concatenate_videoclips, CompositeVideoClip,
38
- CompositeAudioClip, TextClip
39
  )
 
40
  import moviepy.video.fx.all as vfx
41
  import moviepy.config as mpy_config
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- # Custom imports
44
- try:
45
- from kokoro import KPipeline
46
- except ImportError:
47
- print("Warning: Kokoro TTS not available. TTS features will be disabled.")
48
-
49
- # ======================
50
- # CONSTANTS & CONFIG
51
- # ======================
52
- DEFAULT_CONFIG = {
53
- 'PEXELS_API_KEY': os.getenv('PEXELS_API_KEY', 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'),
54
- 'OPENROUTER_API_KEY': os.getenv('OPENROUTER_API_KEY', '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'),
55
- 'OUTPUT_VIDEO': "documentary_output.mp4",
56
- 'USER_AGENT': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
57
- 'MAX_RETRIES': 3,
58
- 'RETRY_DELAY': 2,
59
- 'MAX_VIDEO_DURATION': 600, # 10 minutes
60
- 'TTS_SAMPLE_RATE': 24000,
61
- 'DEFAULT_VOICE': 'en',
62
- 'CAPTION_FONT': 'Arial-Bold',
63
- 'CAPTION_FONT_SIZES': {
64
- '1080p': 40,
65
- '720p': 32,
66
- '480p': 24
67
- },
68
- 'BACKGROUND_MUSIC_VOLUME': 0.08,
69
- 'DEFAULT_FPS': 30,
70
- 'VIDEO_PROBABILITY': 0.45,
71
- 'RESOLUTIONS': {
72
- '1080p': (1920, 1080),
73
- '720p': (1280, 720),
74
- '480p': (854, 480),
75
- 'vertical_1080p': (1080, 1920),
76
- 'vertical_720p': (720, 1280)
77
  }
78
- }
79
-
80
- # ======================
81
- # CORE CLASSES
82
- # ======================
83
-
84
- @dataclass
85
- class VideoSegment:
86
- media_path: str
87
- tts_path: str
88
- narration: str
89
- duration: float
90
- media_type: str # 'image' or 'video'
91
- effects: Dict
92
- caption_style: Dict
93
-
94
- class DocumentaryGenerator:
95
- def __init__(self, config: Optional[Dict] = None):
96
- self.config = config or DEFAULT_CONFIG
97
- self.tts_pipeline = None
98
- self.temp_dir = None
99
- self.current_resolution = None
100
- self.caption_color = None
101
-
102
- # Initialize subsystems
103
- self._initialize_tts()
104
- self._configure_imagemagick()
105
-
106
- def _initialize_tts(self):
107
- """Initialize the TTS pipeline"""
108
- try:
109
- if 'KPipeline' in globals():
110
- print("Initializing Kokoro TTS pipeline...")
111
- self.tts_pipeline = KPipeline(lang_code='a')
112
- print("TTS pipeline ready")
113
- except Exception as e:
114
- print(f"Could not initialize TTS: {str(e)}")
115
- self.tts_pipeline = None
116
-
117
- def _configure_imagemagick(self):
118
- """Configure ImageMagick paths"""
119
- try:
120
- common_paths = [
121
- "/usr/bin/convert",
122
- "/usr/local/bin/convert",
123
- "/opt/homebrew/bin/convert"
124
- ]
125
- for path in common_paths:
126
- if os.path.exists(path):
127
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": path})
128
- print(f"ImageMagick configured: {path}")
129
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  else:
131
- print("ImageMagick not found - text rendering may be limited")
132
- except Exception as e:
133
- print(f"ImageMagick config error: {str(e)}")
134
-
135
- def _create_temp_dir(self):
136
- """Create a temporary working directory"""
137
- if self.temp_dir and os.path.exists(self.temp_dir):
138
- shutil.rmtree(self.temp_dir)
139
- self.temp_dir = tempfile.mkdtemp(prefix="docgen_")
140
- print(f"Created temp directory: {self.temp_dir}")
141
- return self.temp_dir
142
-
143
- def _cleanup(self):
144
- """Clean up temporary resources"""
145
- if self.temp_dir and os.path.exists(self.temp_dir):
146
- try:
147
- shutil.rmtree(self.temp_dir)
148
- print("Cleaned up temporary files")
149
- except Exception as e:
150
- print(f"Cleanup error: {str(e)}")
151
-
152
- def generate_script(self, topic: str) -> str:
153
- """Generate a documentary script using OpenRouter API"""
154
- if not self.config['OPENROUTER_API_KEY']:
155
- return "Error: OpenRouter API key not configured"
156
-
157
- prompt = f"""Create a funny, engaging documentary script about {topic}.
158
- Format each section with [TITLE] followed by narration text.
159
- Keep narration concise (1-2 sentences per section).
160
- Include at least 5 sections.
161
- End with a humorous call-to-action."""
162
-
163
- headers = {
164
- 'Authorization': f'Bearer {self.config['OPENROUTER_API_KEY']}',
165
- 'Content-Type': 'application/json'
166
- }
167
-
168
- data = {
169
- "model": "mistralai/mistral-small-3.1-24b-instruct:free",
170
- "messages": [{"role": "user", "content": prompt}],
171
- "temperature": 0.7,
172
- "max_tokens": 1024
173
- }
174
-
175
- try:
176
- response = requests.post(
177
- 'https://openrouter.ai/api/v1/chat/completions',
178
- headers=headers,
179
- json=data,
180
- timeout=30
181
- )
182
- response.raise_for_status()
183
- return response.json()['choices'][0]['message']['content']
184
- except Exception as e:
185
- return f"Error generating script: {str(e)}"
186
-
187
- def _download_media(self, url: str, filename: str) -> Optional[str]:
188
- """Download media file from URL"""
189
- local_path = os.path.join(self.temp_dir, filename)
190
-
191
- for attempt in range(self.config['MAX_RETRIES']):
192
- try:
193
- with requests.get(url, stream=True, timeout=15) as r:
194
- r.raise_for_status()
195
- with open(local_path, 'wb') as f:
196
- for chunk in r.iter_content(chunk_size=8192):
197
- f.write(chunk)
198
-
199
- # Validate downloaded file
200
- if os.path.getsize(local_path) > 1024:
201
- return local_path
202
-
203
- except Exception as e:
204
- print(f"Download attempt {attempt + 1} failed: {str(e)}")
205
- time.sleep(self.config['RETRY_DELAY'] * (attempt + 1))
206
-
207
- return None
208
-
209
- def _search_pexels_video(self, query: str) -> Optional[str]:
210
- """Search for videos on Pexels"""
211
- if not self.config['PEXELS_API_KEY']:
212
  return None
213
-
214
- headers = {'Authorization': self.config['PEXELS_API_KEY']}
215
- params = {
216
- 'query': query,
217
- 'per_page': 15,
218
- 'orientation': 'landscape'
219
- }
220
-
221
- try:
222
- response = requests.get(
223
- 'https://api.pexels.com/videos/search',
224
- headers=headers,
225
- params=params,
226
- timeout=10
227
- )
228
- response.raise_for_status()
229
-
230
- videos = response.json().get('videos', [])
231
- if videos:
232
- video_files = videos[0].get('video_files', [])
233
- for file in video_files:
234
- if file.get('quality') == 'hd':
235
- return file.get('link')
236
- except Exception as e:
237
- print(f"Pexels search error: {str(e)}")
238
-
239
  return None
240
-
241
- def _generate_tts(self, text: str) -> Optional[str]:
242
- """Generate TTS audio for narration"""
243
- if not self.tts_pipeline:
244
- return None
245
-
246
- safe_name = re.sub(r'[^\w\-_]', '', text[:20]) + '.wav'
247
- output_path = os.path.join(self.temp_dir, safe_name)
248
-
249
- try:
250
- audio_segments = []
251
- for chunk in self.tts_pipeline(text, speed=1.0):
252
- if isinstance(chunk, tuple):
253
- chunk = chunk[-1] # Get audio data from tuple
254
- audio_segments.append(chunk)
255
-
256
- full_audio = np.concatenate(audio_segments)
257
- sf.write(output_path, full_audio, self.config['TTS_SAMPLE_RATE'])
258
- return output_path
259
- except Exception as e:
260
- print(f"TTS generation error: {str(e)}")
261
- return None
262
-
263
- def _create_video_segment(self, segment: VideoSegment) -> Optional[VideoClip]:
264
- """Create a single video segment with media, audio, and effects"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  try:
266
- # Load media
267
- if segment.media_type == 'video':
268
- media_clip = VideoFileClip(segment.media_path)
269
- else:
270
- media_clip = ImageClip(segment.media_path).set_duration(segment.duration)
271
-
272
- # Apply effects
273
- if segment.media_type == 'image':
274
- media_clip = self._apply_kenburns(media_clip)
 
 
 
 
 
 
 
 
275
  else:
276
- media_clip = self._resize_clip(media_clip)
277
-
278
- # Add audio
279
- audio_clip = AudioFileClip(segment.tts_path)
280
- media_clip = media_clip.set_audio(audio_clip)
281
-
282
- # Add captions if enabled
283
- if segment.caption_style.get('enabled', False):
284
- media_clip = self._add_captions(media_clip, segment.narration, segment.caption_style)
285
-
286
- return media_clip
287
- except Exception as e:
288
- print(f"Segment creation error: {str(e)}")
289
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  return None
291
-
292
- def _apply_kenburns(self, clip: ImageClip) -> VideoClip:
293
- """Apply Ken Burns effect to an image clip"""
294
- try:
295
- target_w, target_h = self.current_resolution
296
- base_scale = 1.2
297
-
298
- # Choose random effect
299
- effects = {
300
- 'zoom_in': {'start_scale': 1.0, 'end_scale': base_scale},
301
- 'zoom_out': {'start_scale': base_scale, 'end_scale': 1.0},
302
- 'pan_left': {'start_pos': (0.7, 0.5), 'end_pos': (0.3, 0.5)},
303
- 'pan_right': {'start_pos': (0.3, 0.5), 'end_pos': (0.7, 0.5)}
304
- }
305
- effect = random.choice(list(effects.values()))
306
-
307
- def transform(get_frame, t):
308
- ratio = t / clip.duration
309
- scale = effect['start_scale'] + (effect['end_scale'] - effect['start_scale']) * ratio
310
- pos_x = effect['start_pos'][0] + (effect['end_pos'][0] - effect['start_pos'][0]) * ratio
311
- pos_y = effect['start_pos'][1] + (effect['end_pos'][1] - effect['start_pos'][1]) * ratio
312
-
313
- frame = get_frame(t)
314
- h, w = frame.shape[:2]
315
- crop_size = (int(w/scale), int(h/scale))
316
-
317
- center_x = int(pos_x * w)
318
- center_y = int(pos_y * h)
319
-
320
- x = max(0, min(center_x - crop_size[0]//2, w - crop_size[0]))
321
- y = max(0, min(center_y - crop_size[1]//2, h - crop_size[1]))
322
-
323
- cropped = frame[y:y+crop_size[1], x:x+crop_size[0]]
324
- return cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
325
-
326
- return clip.fl(transform)
327
- except Exception as e:
328
- print(f"Ken Burns error: {str(e)}")
329
- return self._resize_clip(clip)
330
-
331
- def _resize_clip(self, clip: Union[VideoClip, ImageClip]) -> VideoClip:
332
- """Resize clip to target resolution"""
333
  try:
334
- target_w, target_h = self.current_resolution
335
- clip_aspect = clip.w / clip.h
336
- target_aspect = target_w / target_h
337
-
338
- if abs(clip_aspect - target_aspect) < 0.01:
339
- return clip.resize((target_w, target_h))
340
-
341
- if clip_aspect > target_aspect:
342
- # Wider than target
343
- new_height = target_h
344
- new_width = int(clip.w * (new_height / clip.h))
345
- resized = clip.resize(height=new_height)
346
- crop = (resized.w - target_w) / 2
347
- return resized.crop(x1=crop, y1=0, x2=crop+target_w, y2=target_h)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  else:
349
- # Taller than target
350
- new_width = target_w
351
- new_height = int(clip.h * (new_width / clip.w))
352
- resized = clip.resize(width=new_width)
353
- crop = (resized.h - target_h) / 2
354
- return resized.crop(x1=0, y1=crop, x2=target_w, y2=crop+target_h)
355
- except Exception as e:
356
- print(f"Resize error: {str(e)}")
357
- return clip
358
-
359
- def _add_captions(self, clip: VideoClip, text: str, style: Dict) -> VideoClip:
360
- """Add captions to a video clip"""
361
- try:
362
- words = text.split()
363
- chunks = []
364
- current_chunk = []
365
- char_count = 0
366
-
367
- # Split text into manageable chunks
368
- for word in words:
369
- if char_count + len(word) > 30 and current_chunk:
370
- chunks.append(' '.join(current_chunk))
371
- current_chunk = [word]
372
- char_count = len(word)
373
- else:
374
- current_chunk.append(word)
375
- char_count += len(word) + 1
376
-
377
- if current_chunk:
378
- chunks.append(' '.join(current_chunk))
379
-
380
- # Create text clips
381
- text_clips = []
382
- duration_per_chunk = clip.duration / len(chunks)
383
-
384
- for i, chunk in enumerate(chunks):
385
- txt_clip = TextClip(
386
- chunk,
387
- fontsize=style['font_size'],
388
- font=style['font'],
389
- color=style['color'],
390
- bg_color=style['bg_color'],
391
- stroke_color=style['stroke_color'],
392
- stroke_width=style['stroke_width'],
393
- method='caption',
394
- size=(style['max_width'], None)
395
- ).set_start(i * duration_per_chunk).set_duration(duration_per_chunk)
396
-
397
- txt_clip = txt_clip.set_position(('center', style['y_position']))
398
- text_clips.append(txt_clip)
399
-
400
- return CompositeVideoClip([clip] + text_clips)
401
- except Exception as e:
402
- print(f"Caption error: {str(e)}")
403
- return clip
404
-
405
- def generate_video(self, script: str, resolution: str, captions: bool,
406
- music_path: Optional[str] = None) -> Optional[str]:
407
- """Main video generation pipeline"""
408
- start_time = time.time()
409
-
410
  try:
411
- # Setup
412
- self._create_temp_dir()
413
- self.current_resolution = self.config['RESOLUTIONS'].get(resolution, (1920, 1080))
414
- self.caption_color = 'white' if captions else None
415
-
416
- # Parse script into segments
417
- segments = self._parse_script(script)
418
- if not segments:
419
- print("Error: No valid segments found in script")
420
- return None
421
-
422
- # Process segments in parallel
423
- with ThreadPoolExecutor() as executor:
424
- video_segments = list(executor.map(self._process_segment, segments))
425
-
426
- # Combine segments
427
- final_clip = concatenate_videoclips(
428
- [s for s in video_segments if s is not None],
429
- method="compose"
430
- )
431
-
432
- # Add background music if provided
433
- if music_path and os.path.exists(music_path):
434
- music_clip = AudioFileClip(music_path).volumex(self.config['BACKGROUND_MUSIC_VOLUME'])
435
- if music_clip.duration < final_clip.duration:
436
- music_clip = music_clip.loop(duration=final_clip.duration)
437
- final_clip = final_clip.set_audio(
438
- CompositeAudioClip([final_clip.audio, music_clip])
439
- )
440
-
441
- # Export final video
442
- output_path = self.config['OUTPUT_VIDEO']
443
- final_clip.write_videofile(
444
- output_path,
445
- codec='libx264',
446
- audio_codec='aac',
447
- fps=self.config['DEFAULT_FPS'],
448
- threads=os.cpu_count() or 4
449
- )
450
-
451
- print(f"Video generated in {time.time() - start_time:.2f} seconds")
452
- return output_path
453
-
454
- except Exception as e:
455
- print(f"Video generation failed: {str(e)}")
456
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  return None
458
- finally:
459
- self._cleanup()
460
-
461
- def _parse_script(self, script: str) -> List[Dict]:
462
- """Parse script into media and narration segments"""
463
- segments = []
464
- current_title = None
465
- current_text = ""
466
-
467
- for line in script.split('\n'):
468
- line = line.strip()
469
- if not line:
470
- continue
471
-
472
- # Check for section title
473
- title_match = re.match(r'^\s*\[([^\]]+)\]\s*(.*)', line)
474
- if title_match:
475
- if current_title and current_text:
476
- segments.append({
477
- 'title': current_title,
478
- 'text': current_text.strip()
479
- })
480
- current_title = title_match.group(1).strip()
481
- current_text = title_match.group(2).strip() + " "
482
- elif current_title:
483
- current_text += line + " "
484
-
485
- # Add the last segment
486
- if current_title and current_text:
487
- segments.append({
488
- 'title': current_title,
489
- 'text': current_text.strip()
490
- })
491
-
492
- return segments
493
-
494
- def _process_segment(self, segment: Dict) -> Optional[VideoSegment]:
495
- """Process a single script segment into a video segment"""
496
- try:
497
- # Get media
498
- use_video = random.random() < self.config['VIDEO_PROBABILITY']
499
- if use_video:
500
- media_url = self._search_pexels_video(segment['title'])
501
- media_type = 'video'
502
  else:
503
- media_url = self._search_pexels_image(segment['title'])
504
- media_type = 'image'
505
-
506
- if not media_url:
507
- print(f"No media found for: {segment['title']}")
508
- return None
509
-
510
- # Download media
511
- media_ext = '.mp4' if media_type == 'video' else '.jpg'
512
- media_filename = f"media_{len(segment['title'])}_media_ext"
513
- media_path = self._download_media(media_url, media_filename)
514
-
515
- if not media_path:
516
- print(f"Failed to download media for: {segment['title']}")
517
- return None
518
-
519
- # Generate TTS
520
- tts_path = self._generate_tts(segment['text'])
521
- if not tts_path:
522
- print(f"Failed to generate TTS for: {segment['title']}")
523
- return None
524
-
525
- # Calculate duration based on TTS
526
- tts_duration = AudioFileClip(tts_path).duration
527
- duration = max(3.0, min(tts_duration * 1.1, 10.0)) # 3-10 seconds
528
-
529
- # Prepare caption style
530
- caption_style = {
531
- 'enabled': self.caption_color is not None,
532
- 'font_size': self._get_font_size(),
533
- 'font': self.config['CAPTION_FONT'],
534
- 'color': self.caption_color or 'white',
535
- 'bg_color': 'rgba(0,0,0,0.5)',
536
- 'stroke_color': 'black',
537
- 'stroke_width': 1.5,
538
- 'max_width': int(self.current_resolution[0] * 0.8),
539
- 'y_position': int(self.current_resolution[1] * 0.8)
540
- }
541
-
542
- return VideoSegment(
543
- media_path=media_path,
544
- tts_path=tts_path,
545
- narration=segment['text'],
546
- duration=duration,
547
- media_type=media_type,
548
- effects={'type': 'random'},
549
- caption_style=caption_style
550
- )
551
-
552
- except Exception as e:
553
- print(f"Segment processing error: {str(e)}")
554
- return None
555
-
556
- def _get_font_size(self) -> int:
557
- """Get appropriate font size for current resolution"""
558
- if self.current_resolution[1] >= 1080:
559
- return self.config['CAPTION_FONT_SIZES']['1080p']
560
- elif self.current_resolution[1] >= 720:
561
- return self.config['CAPTION_FONT_SIZES']['720p']
562
  else:
563
- return self.config['CAPTION_FONT_SIZES']['480p']
564
-
565
- # ======================
566
- # GRADIO INTERFACE
567
- # ======================
568
-
569
- def create_gradio_interface():
570
- """Create the Gradio web interface"""
571
- generator = DocumentaryGenerator()
572
-
573
- with gr.Blocks(title="AI Documentary Maker", theme="soft") as app:
574
- gr.Markdown("# AI Documentary Video Generator")
575
-
576
- with gr.Row():
577
- with gr.Column():
578
- topic_input = gr.Textbox(label="Documentary Topic", placeholder="Enter your topic...")
579
- generate_script_btn = gr.Button("Generate Script")
580
- script_output = gr.Textbox(label="Generated Script", lines=10, interactive=True)
581
-
582
- with gr.Accordion("Advanced Options", open=False):
583
- resolution = gr.Dropdown(
584
- list(generator.config['RESOLUTIONS'].keys()),
585
- value="1080p",
586
- label="Resolution"
587
- )
588
- captions = gr.Checkbox(
589
- value=True,
590
- label="Enable Captions"
591
- )
592
- music_input = gr.Audio(
593
- label="Background Music",
594
- type="filepath",
595
- optional=True
596
- )
597
- video_prob = gr.Slider(
598
- minimum=0.1,
599
- maximum=1.0,
600
- value=generator.config['VIDEO_PROBABILITY'],
601
- label="Video Clip Probability"
602
- )
603
-
604
- generate_video_btn = gr.Button("Generate Video", variant="primary")
605
-
606
- with gr.Column():
607
- video_output = gr.Video(label="Generated Documentary")
608
- status_output = gr.Textbox(label="Status", interactive=False)
609
-
610
- # Event handlers
611
- generate_script_btn.click(
612
- fn=generator.generate_script,
613
- inputs=[topic_input],
614
- outputs=[script_output]
615
- )
616
-
617
- generate_video_btn.click(
618
- fn=generator.generate_video,
619
- inputs=[script_output, resolution, captions, music_input],
620
- outputs=[video_output],
621
- api_name="generate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
  )
623
-
624
- return app
 
 
625
 
626
- # ======================
627
- # MAIN EXECUTION
628
- # ======================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
 
630
- if __name__ == "__main__":
631
- app = create_gradio_interface()
632
- app.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ # Import necessary libraries
4
+ from kokoro import KPipeline
5
+
 
 
6
  import soundfile as sf
7
  import torch
 
 
8
 
9
+ import soundfile as sf
10
+ import os
11
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip
12
+ from PIL import Image
13
+ import tempfile
14
+ import random
15
+ import cv2
16
+ import math
17
+ import os, requests, io, time, re, random
18
  from moviepy.editor import (
19
+ VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip,
20
+ CompositeVideoClip, TextClip, CompositeAudioClip
 
21
  )
22
+
23
  import moviepy.video.fx.all as vfx
24
  import moviepy.config as mpy_config
25
+ from pydub import AudioSegment
26
+ from pydub.generators import Sine
27
+
28
+ from PIL import Image, ImageDraw, ImageFont
29
+ import numpy as np
30
+ from bs4 import BeautifulSoup
31
+ import base64
32
+ from urllib.parse import quote
33
+ import pysrt
34
+ from gtts import gTTS
35
+ import gradio as gr # Import Gradio
36
+
37
+ # Initialize Kokoro TTS pipeline (using American English)
38
+ pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
39
+ # Ensure ImageMagick binary is set
40
+ mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
41
+
42
+ # ---------------- Global Configuration ---------------- #
43
+ PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
44
+ OPENROUTER_API_KEY = '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'
45
+ OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
46
+ OUTPUT_VIDEO_FILENAME = "final_video.mp4"
47
+ USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
48
+
49
+ # ---------------- Helper Functions ---------------- #
50
+ # (Your existing helper functions remain unchanged: generate_script, parse_script,
51
+ # search_pexels_videos, search_pexels_images, search_google_images, download_image,
52
+ # download_video, generate_media, generate_tts, apply_kenburns_effect,
53
+ # resize_to_fill, find_mp3_files, add_background_music, create_clip,
54
+ # fix_imagemagick_policy)
55
+
56
+ # Define these globally as they were in your original code but will be set per run
57
+ TARGET_RESOLUTION = None
58
+ CAPTION_COLOR = None
59
+ TEMP_FOLDER = None
60
 
61
+ def generate_script(user_input):
62
+ """Generate documentary script with proper OpenRouter handling."""
63
+ headers = {
64
+ 'Authorization': f'Bearer {OPENROUTER_API_KEY}',
65
+ 'HTTP-Referer': 'https://your-domain.com',
66
+ 'X-Title': 'AI Documentary Maker'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
+
69
+ prompt = f"""Short Documentary Script GeneratorInstructions:
70
+
71
+ If I say "use this," just output the script exactly as I gave it.
72
+ If I only give topics, generate a script based on them.
73
+ If I provide a full script, rewrite it without any changes. Make everything short simple and humarous funny and act as serious but humarous. And don't say anything off topic. Also alway say a funny statement to subscribe based on the video topic at the end. Use normal conversational text like a normal person talking and avoid AI phase make the statements humanize and normal conversational
74
+ And always talk negative views don't really wants any positive statement and the full script will be one video about one topic so keep in mind
75
+ Formatting Rules:
76
+
77
+
78
+ Title in Square Brackets:
79
+
80
+
81
+ Each section starts with a one-word title inside [ ] (max two words if necessary).
82
+ This title will be used as a search term for Pexels footage.
83
+
84
+
85
+
86
+ Casual & Funny Narration:
87
+
88
+
89
+ Each section has 5-10 words of narration.
90
+ Keep it natural, funny, and unpredictable (not robotic, poetic, or rhythmic).
91
+
92
+
93
+
94
+ No Special Formatting:
95
+
96
+
97
+ No bold, italics, or special characters. You are a assistant AI your task is to create script. You aren't a chatbot. So, don't write extra text
98
+
99
+
100
+
101
+ Generalized Search Terms:
102
+
103
+
104
+ If a term is too specific, make it more general for Pexels search.
105
+
106
+
107
+
108
+ Scene-Specific Writing:
109
+
110
+
111
+ Each section describes only what should be shown in the video.
112
+
113
+
114
+
115
+ Output Only the Script, and also make it funny and humarous and helirous and also add to subscribe with a funny statement like subscribe now or .....
116
+
117
+
118
+ No extra text, just the script.
119
+
120
+
121
+
122
+ Example Output:
123
+ [North Korea]
124
+
125
+ Top 5 unknown facts about North Korea.
126
+
127
+ [Invisibility]
128
+
129
+ North Korea’s internet speed is so fast… it doesn’t exist.
130
+
131
+ [Leadership]
132
+
133
+ Kim Jong-un once won an election with 100% votes… against himself.
134
+
135
+ [Magic]
136
+
137
+ North Korea discovered time travel. That’s why their news is always from the past.
138
+
139
+ [Warning]
140
+
141
+ Subscribe now, or Kim Jong-un will send you a free one-way ticket… to North Korea.
142
+
143
+ [Freedom]
144
+
145
+ North Korean citizens can do anything… as long as it's government-approved.
146
+ Now here is the Topic/scrip: {user_input}
147
+ """
148
+
149
+ data = {
150
+ 'model': OPENROUTER_MODEL,
151
+ 'messages': [{'role': 'user', 'content': prompt}],
152
+ 'temperature': 0.4,
153
+ 'max_tokens': 5000
154
+ }
155
+
156
+ try:
157
+ response = requests.post(
158
+ 'https://openrouter.ai/api/v1/chat/completions',
159
+ headers=headers,
160
+ json=data,
161
+ timeout=30
162
+ )
163
+
164
+ if response.status_code == 200:
165
+ response_data = response.json()
166
+ if 'choices' in response_data and len(response_data['choices']) > 0:
167
+ return response_data['choices'][0]['message']['content']
168
  else:
169
+ print("Unexpected response format:", response_data)
170
+ return None
171
+ else:
172
+ print(f"API Error {response.status_code}: {response.text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  return None
174
+
175
+ except Exception as e:
176
+ print(f"Request failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  return None
178
+
179
+ def parse_script(script_text):
180
+ """
181
+ Parse the generated script into a list of elements.
182
+ For each section, create two elements:
183
+ - A 'media' element using the section title as the visual prompt.
184
+ - A 'tts' element with the narration text, voice info, and computed duration.
185
+ """
186
+ sections = {}
187
+ current_title = None
188
+ current_text = ""
189
+
190
+ try:
191
+ for line in script_text.splitlines():
192
+ line = line.strip()
193
+ if line.startswith("[") and "]" in line:
194
+ bracket_start = line.find("[")
195
+ bracket_end = line.find("]", bracket_start)
196
+ if bracket_start != -1 and bracket_end != -1:
197
+ if current_title is not None:
198
+ sections[current_title] = current_text.strip()
199
+ current_title = line[bracket_start+1:bracket_end]
200
+ current_text = line[bracket_end+1:].strip()
201
+ elif current_title:
202
+ current_text += line + " "
203
+
204
+ if current_title:
205
+ sections[current_title] = current_text.strip()
206
+
207
+ elements = []
208
+ for title, narration in sections.items():
209
+ if not title or not narration:
210
+ continue
211
+
212
+ media_element = {"type": "media", "prompt": title, "effects": "fade-in"}
213
+ words = narration.split()
214
+ duration = max(3, len(words) * 0.5)
215
+ tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
216
+ elements.append(media_element)
217
+ elements.append(tts_element)
218
+
219
+ return elements
220
+ except Exception as e:
221
+ print(f"Error parsing script: {e}")
222
+ return []
223
+
224
+ def search_pexels_videos(query, pexels_api_key):
225
+ """Search for a video on Pexels by query and return a random HD video."""
226
+ headers = {'Authorization': pexels_api_key}
227
+ base_url = "https://api.pexels.com/videos/search"
228
+ num_pages = 3
229
+ videos_per_page = 15
230
+
231
+ max_retries = 3
232
+ retry_delay = 1
233
+
234
+ search_query = query
235
+ all_videos = []
236
+
237
+ for page in range(1, num_pages + 1):
238
+ for attempt in range(max_retries):
239
+ try:
240
+ params = {"query": search_query, "per_page": videos_per_page, "page": page}
241
+ response = requests.get(base_url, headers=headers, params=params, timeout=10)
242
+
243
+ if response.status_code == 200:
244
+ data = response.json()
245
+ videos = data.get("videos", [])
246
+
247
+ if not videos:
248
+ print(f"No videos found on page {page}.")
249
+ break
250
+
251
+ for video in videos:
252
+ video_files = video.get("video_files", [])
253
+ for file in video_files:
254
+ if file.get("quality") == "hd":
255
+ all_videos.append(file.get("link"))
256
+ break
257
+
258
+ break
259
+
260
+ elif response.status_code == 429:
261
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
262
+ time.sleep(retry_delay)
263
+ retry_delay *= 2
264
+ else:
265
+ print(f"Error fetching videos: {response.status_code} {response.text}")
266
+ if attempt < max_retries - 1:
267
+ print(f"Retrying in {retry_delay} seconds...")
268
+ time.sleep(retry_delay)
269
+ retry_delay *= 2
270
+ else:
271
+ break
272
+
273
+ except requests.exceptions.RequestException as e:
274
+ print(f"Request exception: {e}")
275
+ if attempt < max_retries - 1:
276
+ print(f"Retrying in {retry_delay} seconds...")
277
+ time.sleep(retry_delay)
278
+ retry_delay *= 2
279
+ else:
280
+ break
281
+
282
+ if all_videos:
283
+ random_video = random.choice(all_videos)
284
+ print(f"Selected random video from {len(all_videos)} HD videos")
285
+ return random_video
286
+ else:
287
+ print("No suitable videos found after searching all pages.")
288
+ return None
289
+
290
+ def search_pexels_images(query, pexels_api_key):
291
+ """Search for an image on Pexels by query."""
292
+ headers = {'Authorization': pexels_api_key}
293
+ url = "https://api.pexels.com/v1/search"
294
+ params = {"query": query, "per_page": 5, "orientation": "landscape"}
295
+
296
+ max_retries = 3
297
+ retry_delay = 1
298
+
299
+ for attempt in range(max_retries):
300
  try:
301
+ response = requests.get(url, headers=headers, params=params, timeout=10)
302
+
303
+ if response.status_code == 200:
304
+ data = response.json()
305
+ photos = data.get("photos", [])
306
+ if photos:
307
+ photo = random.choice(photos[:min(5, len(photos))])
308
+ img_url = photo.get("src", {}).get("original")
309
+ return img_url
310
+ else:
311
+ print(f"No images found for query: {query}")
312
+ return None
313
+
314
+ elif response.status_code == 429:
315
+ print(f"Rate limit hit (attempt {attempt+1}/{max_retries}). Retrying in {retry_delay} seconds...")
316
+ time.sleep(retry_delay)
317
+ retry_delay *= 2
318
  else:
319
+ print(f"Error fetching images: {response.status_code} {response.text}")
320
+ if attempt < max_retries - 1:
321
+ print(f"Retrying in {retry_delay} seconds...")
322
+ time.sleep(retry_delay)
323
+ retry_delay *= 2
324
+
325
+ except requests.exceptions.RequestException as e:
326
+ print(f"Request exception: {e}")
327
+ if attempt < max_retries - 1:
328
+ print(f"Retrying in {retry_delay} seconds...")
329
+ time.sleep(retry_delay)
330
+ retry_delay *= 2
331
+
332
+ print(f"No Pexels images found for query: {query} after all attempts")
333
+ return None
334
+
335
+ def search_google_images(query):
336
+ """Search for images on Google Images (for news-related queries)"""
337
+ try:
338
+ search_url = f"https://www.google.com/search?q={quote(query)}&tbm=isch"
339
+ headers = {"User-Agent": USER_AGENT}
340
+ response = requests.get(search_url, headers=headers, timeout=10)
341
+ soup = BeautifulSoup(response.text, "html.parser")
342
+
343
+ img_tags = soup.find_all("img")
344
+ image_urls = []
345
+ for img in img_tags:
346
+ src = img.get("src", "")
347
+ if src.startswith("http") and "gstatic" not in src:
348
+ image_urls.append(src)
349
+
350
+ if image_urls:
351
+ return random.choice(image_urls[:5]) if len(image_urls) >= 5 else image_urls[0]
352
+ else:
353
+ print(f"No Google Images found for query: {query}")
354
  return None
355
+ except Exception as e:
356
+ print(f"Error in Google Images search: {e}")
357
+ return None
358
+
359
+ def download_image(image_url, filename):
360
+ """Download an image from a URL to a local file with enhanced error handling."""
361
+ try:
362
+ headers = {"User-Agent": USER_AGENT}
363
+ print(f"Downloading image from: {image_url} to {filename}")
364
+ response = requests.get(image_url, headers=headers, stream=True, timeout=15)
365
+ response.raise_for_status()
366
+
367
+ with open(filename, 'wb') as f:
368
+ for chunk in response.iter_content(chunk_size=8192):
369
+ f.write(chunk)
370
+
371
+ print(f"Image downloaded successfully to: {filename}")
372
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  try:
374
+ img = Image.open(filename)
375
+ img.verify()
376
+ img = Image.open(filename)
377
+ if img.mode != 'RGB':
378
+ img = img.convert('RGB')
379
+ img.save(filename)
380
+ print(f"Image validated and processed: {filename}")
381
+ return filename
382
+ except Exception as e_validate:
383
+ print(f"Downloaded file is not a valid image: {e_validate}")
384
+ if os.path.exists(filename):
385
+ os.remove(filename)
386
+ return None
387
+
388
+ except requests.exceptions.RequestException as e_download:
389
+ print(f"Image download error: {e_download}")
390
+ if os.path.exists(filename):
391
+ os.remove(filename)
392
+ return None
393
+ except Exception as e_general:
394
+ print(f"General error during image processing: {e_general}")
395
+ if os.path.exists(filename):
396
+ os.remove(filename)
397
+ return None
398
+
399
+ def download_video(video_url, filename):
400
+ """Download a video from a URL to a local file."""
401
+ try:
402
+ response = requests.get(video_url, stream=True, timeout=30)
403
+ response.raise_for_status()
404
+ with open(filename, 'wb') as f:
405
+ for chunk in response.iter_content(chunk_size=8192):
406
+ f.write(chunk)
407
+ print(f"Video downloaded successfully to: {filename}")
408
+ return filename
409
+ except Exception as e:
410
+ print(f"Video download error: {e}")
411
+ if os.path.exists(filename):
412
+ os.remove(filename)
413
+ return None
414
+
415
+ def generate_media(prompt, user_image=None, current_index=0, total_segments=1):
416
+ """
417
+ Generate a visual asset by first searching for a video or using a specific search strategy.
418
+ For news-related queries, use Google Images.
419
+ Returns a dict: {'path': <file_path>, 'asset_type': 'video' or 'image'}.
420
+ """
421
+ safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
422
+
423
+ if "news" in prompt.lower():
424
+ print(f"News-related query detected: {prompt}. Using Google Images...")
425
+ image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_news.jpg")
426
+ image_url = search_google_images(prompt)
427
+ if image_url:
428
+ downloaded_image = download_image(image_url, image_file)
429
+ if downloaded_image:
430
+ print(f"News image saved to {downloaded_image}")
431
+ return {"path": downloaded_image, "asset_type": "image"}
432
+ else:
433
+ print(f"Google Images search failed for prompt: {prompt}")
434
+
435
+ if random.random() < 0.25:
436
+ video_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}_video.mp4")
437
+ video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
438
+ if video_url:
439
+ downloaded_video = download_video(video_url, video_file)
440
+ if downloaded_video:
441
+ print(f"Video asset saved to {downloaded_video}")
442
+ return {"path": downloaded_video, "asset_type": "video"}
443
+ else:
444
+ print(f"Pexels video search failed for prompt: {prompt}")
445
+
446
+ image_file = os.path.join(TEMP_FOLDER, f"{safe_prompt}.jpg")
447
+ image_url = search_pexels_images(prompt, PEXELS_API_KEY)
448
+ if image_url:
449
+ downloaded_image = download_image(image_url, image_file)
450
+ if downloaded_image:
451
+ print(f"Image asset saved to {downloaded_image}")
452
+ return {"path": downloaded_image, "asset_type": "image"}
453
+ else:
454
+ print(f"Pexels image download failed for prompt: {prompt}")
455
+
456
+ fallback_terms = ["nature", "people", "landscape", "technology", "business"]
457
+ for term in fallback_terms:
458
+ print(f"Trying fallback image search with term: {term}")
459
+ fallback_file = os.path.join(TEMP_FOLDER, f"fallback_{term}.jpg")
460
+ fallback_url = search_pexels_images(term, PEXELS_API_KEY)
461
+ if fallback_url:
462
+ downloaded_fallback = download_image(fallback_url, fallback_file)
463
+ if downloaded_fallback:
464
+ print(f"Fallback image saved to {downloaded_fallback}")
465
+ return {"path": downloaded_fallback, "asset_type": "image"}
466
  else:
467
+ print(f"Fallback image download failed for term: {term}")
468
+ else:
469
+ print(f"Fallback image search failed for term: {term}")
470
+
471
+ print(f"Failed to generate visual asset for prompt: {prompt}")
472
+ return None
473
+
474
+ def generate_silent_audio(duration, sample_rate=24000):
475
+ """Generate a silent WAV audio file lasting 'duration' seconds."""
476
+ num_samples = int(duration * sample_rate)
477
+ silence = np.zeros(num_samples, dtype=np.float32)
478
+ silent_path = os.path.join(TEMP_FOLDER, f"silent_{int(time.time())}.wav")
479
+ sf.write(silent_path, silence, sample_rate)
480
+ print(f"Silent audio generated: {silent_path}")
481
+ return silent_path
482
+
483
+ def generate_tts(text, voice):
484
+ """
485
+ Generate TTS audio using Kokoro, falling back to gTTS or silent audio if needed.
486
+ """
487
+ safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
488
+ file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
489
+
490
+ if os.path.exists(file_path):
491
+ print(f"Using cached TTS for text '{text[:10]}...'")
492
+ return file_path
493
+
494
+ try:
495
+ kokoro_voice = 'af_heart' if voice == 'en' else voice
496
+ generator = pipeline(text, voice=kokoro_voice, speed=0.9, split_pattern=r'\n+')
497
+ audio_segments = []
498
+ for i, (gs, ps, audio) in enumerate(generator):
499
+ audio_segments.append(audio)
500
+ full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
501
+ sf.write(file_path, full_audio, 24000)
502
+ print(f"TTS audio saved to {file_path} (Kokoro)")
503
+ return file_path
504
+ except Exception as e:
505
+ print(f"Error with Kokoro TTS: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
  try:
507
+ print("Falling back to gTTS...")
508
+ tts = gTTS(text=text, lang='en')
509
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
510
+ tts.save(mp3_path)
511
+ audio = AudioSegment.from_mp3(mp3_path)
512
+ audio.export(file_path, format="wav")
513
+ os.remove(mp3_path)
514
+ print(f"Fallback TTS saved to {file_path} (gTTS)")
515
+ return file_path
516
+ except Exception as fallback_error:
517
+ print(f"Both TTS methods failed: {fallback_error}")
518
+ return generate_silent_audio(duration=max(3, len(text.split()) * 0.5))
519
+
520
+ def apply_kenburns_effect(clip, target_resolution, effect_type=None):
521
+ """Apply a smooth Ken Burns effect with a single movement pattern."""
522
+ target_w, target_h = target_resolution
523
+ clip_aspect = clip.w / clip.h
524
+ target_aspect = target_w / target_h
525
+
526
+ if clip_aspect > target_aspect:
527
+ new_height = target_h
528
+ new_width = int(new_height * clip_aspect)
529
+ else:
530
+ new_width = target_w
531
+ new_height = int(new_width / clip_aspect)
532
+
533
+ clip = clip.resize(newsize=(new_width, new_height))
534
+ base_scale = 1.15
535
+ new_width = int(new_width * base_scale)
536
+ new_height = int(new_height * base_scale)
537
+ clip = clip.resize(newsize=(new_width, new_height))
538
+
539
+ max_offset_x = new_width - target_w
540
+ max_offset_y = new_height - target_h
541
+
542
+ available_effects = ["zoom-in", "zoom-out", "pan-left", "pan-right", "up-left"]
543
+ if effect_type is None or effect_type == "random":
544
+ effect_type = random.choice(available_effects)
545
+
546
+ if effect_type == "zoom-in":
547
+ start_zoom = 0.9
548
+ end_zoom = 1.1
549
+ start_center = (new_width / 2, new_height / 2)
550
+ end_center = start_center
551
+ elif effect_type == "zoom-out":
552
+ start_zoom = 1.1
553
+ end_zoom = 0.9
554
+ start_center = (new_width / 2, new_height / 2)
555
+ end_center = start_center
556
+ elif effect_type == "pan-left":
557
+ start_zoom = 1.0
558
+ end_zoom = 1.0
559
+ start_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
560
+ end_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
561
+ elif effect_type == "pan-right":
562
+ start_zoom = 1.0
563
+ end_zoom = 1.0
564
+ start_center = (target_w / 2, (max_offset_y // 2) + target_h / 2)
565
+ end_center = (max_offset_x + target_w / 2, (max_offset_y // 2) + target_h / 2)
566
+ elif effect_type == "up-left":
567
+ start_zoom = 1.0
568
+ end_zoom = 1.0
569
+ start_center = (max_offset_x + target_w / 2, max_offset_y + target_h / 2)
570
+ end_center = (target_w / 2, target_h / 2)
571
+ else:
572
+ raise ValueError(f"Unsupported effect_type: {effect_type}")
573
+
574
+ def transform_frame(get_frame, t):
575
+ frame = get_frame(t)
576
+ ratio = t / clip.duration if clip.duration > 0 else 0
577
+ ratio = 0.5 - 0.5 * math.cos(math.pi * ratio)
578
+ current_zoom = start_zoom + (end_zoom - start_zoom) * ratio
579
+ crop_w = int(target_w / current_zoom)
580
+ crop_h = int(target_h / current_zoom)
581
+ current_center_x = start_center[0] + (end_center[0] - start_center[0]) * ratio
582
+ current_center_y = start_center[1] + (end_center[1] - start_center[1]) * ratio
583
+ min_center_x = crop_w / 2
584
+ max_center_x = new_width - crop_w / 2
585
+ min_center_y = crop_h / 2
586
+ max_center_y = new_height - crop_h / 2
587
+ current_center_x = max(min_center_x, min(current_center_x, max_center_x))
588
+ current_center_y = max(min_center_y, min(current_center_y, max_center_y))
589
+ cropped_frame = cv2.getRectSubPix(frame, (crop_w, crop_h), (current_center_x, current_center_y))
590
+ resized_frame = cv2.resize(cropped_frame, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
591
+ return resized_frame
592
+
593
+ return clip.fl(transform_frame)
594
+
595
+ def resize_to_fill(clip, target_resolution):
596
+ """Resize and crop a clip to fill the target resolution while maintaining aspect ratio."""
597
+ target_w, target_h = target_resolution
598
+ clip_aspect = clip.w / clip.h
599
+ target_aspect = target_w / target_h
600
+
601
+ if clip_aspect > target_aspect:
602
+ clip = clip.resize(height=target_h)
603
+ crop_amount = (clip.w - target_w) / 2
604
+ clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
605
+ else:
606
+ clip = clip.resize(width=target_w)
607
+ crop_amount = (clip.h - target_h) / 2
608
+ clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
609
+
610
+ return clip
611
+
612
+ def find_mp3_files():
613
+ """Search for any MP3 files in the current directory and subdirectories."""
614
+ mp3_files = []
615
+ for root, dirs, files in os.walk('.'):
616
+ for file in files:
617
+ if file.endswith('.mp3'):
618
+ mp3_path = os.path.join(root, file)
619
+ mp3_files.append(mp3_path)
620
+ print(f"Found MP3 file: {mp3_path}")
621
+ return mp3_files[0] if mp3_files else None
622
+
623
+ def add_background_music(final_video, bg_music_volume=0.10):
624
+ """Add background music to the final video using any MP3 file found."""
625
+ try:
626
+ bg_music_path = "music.mp3"
627
+ if bg_music_path and os.path.exists(bg_music_path):
628
+ print(f"Adding background music from: {bg_music_path}")
629
+ bg_music = AudioFileClip(bg_music_path)
630
+ if bg_music.duration < final_video.duration:
631
+ loops_needed = math.ceil(final_video.duration / bg_music.duration)
632
+ bg_segments = [bg_music] * loops_needed
633
+ bg_music = concatenate_audioclips(bg_segments)
634
+ bg_music = bg_music.subclip(0, final_video.duration)
635
+ bg_music = bg_music.volumex(bg_music_volume)
636
+ video_audio = final_video.audio
637
+ mixed_audio = CompositeAudioClip([video_audio, bg_music])
638
+ final_video = final_video.set_audio(mixed_audio)
639
+ print("Background music added successfully")
640
+ else:
641
+ print("No MP3 files found, skipping background music")
642
+ return final_video
643
+ except Exception as e:
644
+ print(f"Error adding background music: {e}")
645
+ print("Continuing without background music")
646
+ return final_video
647
+
648
+ def create_clip(media_path, asset_type, tts_path, duration=None, effects=None, narration_text=None, segment_index=0):
649
+ """Create a video clip with synchronized subtitles and narration."""
650
+ try:
651
+ print(f"Creating clip #{segment_index} with asset_type: {asset_type}, media_path: {media_path}")
652
+ if not os.path.exists(media_path) or not os.path.exists(tts_path):
653
+ print("Missing media or TTS file")
654
  return None
655
+
656
+ audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
657
+ audio_duration = audio_clip.duration
658
+ target_duration = audio_duration + 0.2
659
+
660
+ if asset_type == "video":
661
+ clip = VideoFileClip(media_path)
662
+ clip = resize_to_fill(clip, TARGET_RESOLUTION)
663
+ if clip.duration < target_duration:
664
+ clip = clip.loop(duration=target_duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665
  else:
666
+ clip = clip.subclip(0, target_duration)
667
+ elif asset_type == "image":
668
+ img = Image.open(media_path)
669
+ if img.mode != 'RGB':
670
+ with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp:
671
+ img.convert('RGB').save(temp.name)
672
+ media_path = temp.name
673
+ img.close()
674
+ clip = ImageClip(media_path).set_duration(target_duration)
675
+ clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
676
+ clip = clip.fadein(0.3).fadeout(0.3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  else:
678
+ return None
679
+
680
+ if narration_text and CAPTION_COLOR != "transparent":
681
+ try:
682
+ words = narration_text.split()
683
+ chunks = []
684
+ current_chunk = []
685
+ for word in words:
686
+ current_chunk.append(word)
687
+ if len(current_chunk) >= 5:
688
+ chunks.append(' '.join(current_chunk))
689
+ current_chunk = []
690
+ if current_chunk:
691
+ chunks.append(' '.join(current_chunk))
692
+
693
+ chunk_duration = audio_duration / len(chunks)
694
+ subtitle_clips = []
695
+ subtitle_y_position = int(TARGET_RESOLUTION[1] * 0.70)
696
+
697
+ for i, chunk_text in enumerate(chunks):
698
+ start_time = i * chunk_duration
699
+ end_time = (i + 1) * chunk_duration
700
+ txt_clip = TextClip(
701
+ chunk_text,
702
+ fontsize=45,
703
+ font='Arial-Bold',
704
+ color=CAPTION_COLOR,
705
+ bg_color='rgba(0, 0, 0, 0.25)',
706
+ method='caption',
707
+ align='center',
708
+ stroke_width=2,
709
+ stroke_color=CAPTION_COLOR,
710
+ size=(TARGET_RESOLUTION[0] * 0.8, None)
711
+ ).set_start(start_time).set_end(end_time)
712
+ txt_clip = txt_clip.set_position(('center', subtitle_y_position))
713
+ subtitle_clips.append(txt_clip)
714
+
715
+ clip = CompositeVideoClip([clip] + subtitle_clips)
716
+ except Exception as sub_error:
717
+ print(f"Subtitle error: {sub_error}")
718
+ txt_clip = TextClip(
719
+ narration_text,
720
+ fontsize=28,
721
+ color=CAPTION_COLOR,
722
+ align='center',
723
+ size=(TARGET_RESOLUTION[0] * 0.7, None)
724
+ ).set_position(('center', int(TARGET_RESOLUTION[1] / 3))).set_duration(clip.duration)
725
+ clip = CompositeVideoClip([clip, txt_clip])
726
+
727
+ clip = clip.set_audio(audio_clip)
728
+ print(f"Clip created: {clip.duration:.1f}s")
729
+ return clip
730
+ except Exception as e:
731
+ print(f"Error in create_clip: {str(e)}")
732
+ return None
733
+
734
+ def fix_imagemagick_policy():
735
+ """Fix ImageMagick security policies."""
736
+ try:
737
+ print("Attempting to fix ImageMagick security policies...")
738
+ policy_paths = [
739
+ "/etc/ImageMagick-6/policy.xml",
740
+ "/etc/ImageMagick-7/policy.xml",
741
+ "/etc/ImageMagick/policy.xml",
742
+ "/usr/local/etc/ImageMagick-7/policy.xml"
743
+ ]
744
+ found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
745
+ if not found_policy:
746
+ print("No policy.xml found. Using alternative subtitle method.")
747
+ return False
748
+ print(f"Modifying policy file at {found_policy}")
749
+ os.system(f"sudo cp {found_policy} {found_policy}.bak")
750
+ os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
751
+ os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
752
+ os.system(f"sudo sed -i 's/<policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"[^>]*>/<!-- <policy domain=\"coder\" rights=\"none\" pattern=\"PDF\"> -->/g' {found_policy}")
753
+ print("ImageMagick policies updated successfully.")
754
+ return True
755
+ except Exception as e:
756
+ print(f"Error fixing policies: {e}")
757
+ return False
758
+
759
+ # ---------------- Main Function with Gradio Integration ---------------- #
760
+ def generate_video(user_input, resolution, caption_option):
761
+ """Generate a video based on user input via Gradio."""
762
+ global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
763
+ import shutil
764
+
765
+ # Set resolution
766
+ if resolution == "Full":
767
+ TARGET_RESOLUTION = (1920, 1080)
768
+ elif resolution == "Short":
769
+ TARGET_RESOLUTION = (1080, 1920)
770
+ else:
771
+ TARGET_RESOLUTION = (1920, 1080) # Default
772
+
773
+ # Set caption color
774
+ CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
775
+
776
+ # Create a unique temporary folder
777
+ TEMP_FOLDER = tempfile.mkdtemp()
778
+
779
+ # Fix ImageMagick policy
780
+ fix_success = fix_imagemagick_policy()
781
+ if not fix_success:
782
+ print("Will use alternative methods if needed")
783
+
784
+ print("Generating script from API...")
785
+ script = generate_script(user_input)
786
+ if not script:
787
+ print("Failed to generate script.")
788
+ shutil.rmtree(TEMP_FOLDER)
789
+ return None
790
+ print("Generated Script:\n", script)
791
+ elements = parse_script(script)
792
+ if not elements:
793
+ print("Failed to parse script into elements.")
794
+ shutil.rmtree(TEMP_FOLDER)
795
+ return None
796
+ print(f"Parsed {len(elements)//2} script segments.")
797
+
798
+ paired_elements = []
799
+ for i in range(0, len(elements), 2):
800
+ if i + 1 < len(elements):
801
+ paired_elements.append((elements[i], elements[i + 1]))
802
+
803
+ if not paired_elements:
804
+ print("No valid script segments found.")
805
+ shutil.rmtree(TEMP_FOLDER)
806
+ return None
807
+
808
+ clips = []
809
+ for idx, (media_elem, tts_elem) in enumerate(paired_elements):
810
+ print(f"\nProcessing segment {idx+1}/{len(paired_elements)} with prompt: '{media_elem['prompt']}'")
811
+ media_asset = generate_media(media_elem['prompt'], current_index=idx, total_segments=len(paired_elements))
812
+ if not media_asset:
813
+ print(f"Skipping segment {idx+1} due to missing media asset.")
814
+ continue
815
+ tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
816
+ if not tts_path:
817
+ print(f"Skipping segment {idx+1} due to TTS generation failure.")
818
+ continue
819
+ clip = create_clip(
820
+ media_path=media_asset['path'],
821
+ asset_type=media_asset['asset_type'],
822
+ tts_path=tts_path,
823
+ duration=tts_elem['duration'],
824
+ effects=media_elem.get('effects', 'fade-in'),
825
+ narration_text=tts_elem['text'],
826
+ segment_index=idx
827
  )
828
+ if clip:
829
+ clips.append(clip)
830
+ else:
831
+ print(f"Clip creation failed for segment {idx+1}.")
832
 
833
+ if not clips:
834
+ print("No clips were successfully created.")
835
+ shutil.rmtree(TEMP_FOLDER)
836
+ return None
837
+
838
+ print("\nConcatenating clips...")
839
+ final_video = concatenate_videoclips(clips, method="compose")
840
+ final_video = add_background_music(final_video, bg_music_volume=0.08)
841
+
842
+ print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
843
+ final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=30, preset='veryfast')
844
+ print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
845
+
846
+ # Clean up
847
+ print("Cleaning up temporary files...")
848
+ shutil.rmtree(TEMP_FOLDER)
849
+ print("Temporary files removed.")
850
+
851
+ return OUTPUT_VIDEO_FILENAME
852
+
853
+ # ---------------- Gradio Interface ---------------- #
854
+ iface = gr.Interface(
855
+ fn=generate_video,
856
+ inputs=[
857
+ gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
858
+ gr.Radio(["Full", "Short"], label="Resolution", value="Full"),
859
+ gr.Radio(["Yes", "No"], label="Captions", value="Yes")
860
+ ],
861
+ outputs=gr.Video(label="Generated Video"),
862
+ title="AI Documentary Video Generator",
863
+ description="Create a funny documentary-style video based on your concept. Note: Generation may take several minutes on CPU."
864
+ )
865
 
866
+ # Launch the interface
867
+ iface.launch(share=True)