testdeep123 commited on
Commit
fb20f92
·
verified ·
1 Parent(s): 3ea8b5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -372
app.py CHANGED
@@ -1,117 +1,44 @@
1
- # Import necessary libraries
2
- from kokoro import KPipeline
3
- import soundfile as sf
4
  import os
5
- from moviepy.editor import (
6
- VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeVideoClip, TextClip, CompositeAudioClip
7
- )
8
  import tempfile
9
  import random
10
  import shutil
11
- import moviepy.config as mpy_config
12
- from pydub import AudioSegment
13
  from gtts import gTTS
 
 
 
14
  import gradio as gr
15
- import requests
16
- import re
17
-
18
- # Initialize Kokoro TTS pipeline (using American English)
19
- pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
20
-
21
- # Ensure ImageMagick binary is set
22
- mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
23
 
24
  # Global Configuration
25
- OPENROUTER_API_KEY = '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'
26
- OPENROUTER_MODEL = "google/gemini-2.0-flash-exp:free"
27
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
28
- USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
29
- TARGET_RESOLUTION = (1080, 1920) # Fixed vertical resolution
30
- CAPTION_COLOR = None
31
  TEMP_FOLDER = None
32
- selected_voice = 'af_heart' # Default voice
33
- voice_speed = 0.9 # Default voice speed
34
- font_size = 45 # Default font size
35
- bg_music_volume = 0.08 # Default background music volume
36
- fps = 30 # Default FPS
37
- preset = "veryfast" # Default preset
38
-
39
- # Helper Functions
40
- def generate_script(user_input):
41
- """Generate a documentary script based on user input."""
42
- headers = {
43
- 'Authorization': f'Bearer {OPENROUTER_API_KEY}',
44
- 'HTTP-Referer': 'https://your-domain.com',
45
- 'X-Title': 'AI Documentary Maker'
46
- }
47
- prompt = f"""You're a professional documentary narrator. Your job is to write a serious, natural, and informative video script based on one topic.
48
-
49
- The script should sound like a real human voiceover from a TV show or documentary — clear, factual, and engaging, like something you'd hear on National Geographic or a news report.
50
-
51
- Structure:
52
- - Break the script into scenes using [Tags]. Each tag is a short title (1–2 words) that describes the visual or idea.
53
- - Under each tag, write one sentence (max 12 words) that fits the tag and continues the topic.
54
- - The full script should make sense as one connected narration — no randomness.
55
- - Use natural, formal English. No slang, no fake AI language, and no robotic tone.
56
- - Do not use humor, sarcasm, or casual language. This is a serious narration.
57
- - No emotion-sound words like “aww,” “eww,” “whoa,” etc.
58
- - Do not use numbers like 1, 2, 3 — write them out as one, two, three.
59
- - At the end, add a [Subscribe] tag with a formal or respectful reason to follow or subscribe.
60
-
61
- Only output the script. No extra comments or text.
62
-
63
- Example:
64
-
65
- [Ocean]
66
-
67
- The ocean covers over seventy percent of the Earth's surface.
68
-
69
- [Currents]
70
-
71
- Ocean currents distribute heat and regulate global climate patterns.
72
-
73
- [Coral Reefs]
74
-
75
- These ecosystems support over one million species of marine life.
76
-
77
- [Pollution]
78
-
79
- Plastic waste threatens marine biodiversity and food chains.
80
-
81
- [Climate Impact]
82
-
83
- Rising temperatures are causing coral bleaching and habitat loss.
84
-
85
- [Subscribe]
86
-
87
- Follow to explore more about the changing planet we live on.
88
-
89
- Topic: {user_input}
90
- """
91
- data = {
92
- 'model': OPENROUTER_MODEL,
93
- 'messages': [{'role': 'user', 'content': prompt}],
94
- 'temperature': 0.4,
95
- 'max_tokens': 5000
96
- }
97
- try:
98
- response = requests.post(
99
- 'https://openrouter.ai/api/v1/chat/completions',
100
- headers=headers,
101
- json=data,
102
- timeout=30
103
- )
104
- if response.status_code == 200:
105
- return response.json()['choices'][0]['message']['content']
106
- else:
107
- print(f"API Error {response.status_code}: {response.text}")
108
- return None
109
- except Exception as e:
110
- print(f"Request failed: {str(e)}")
111
- return None
112
 
113
  def parse_script(script_text):
114
- """Parse the script into narration elements."""
115
  sections = {}
116
  current_title = None
117
  current_text = ""
@@ -129,301 +56,155 @@ def parse_script(script_text):
129
  current_text += line + " "
130
  if current_title:
131
  sections[current_title] = current_text.strip()
132
- elements = []
133
- for title, narration in sections.items():
134
- if not narration:
135
- continue
136
- words = narration.split()
137
- duration = max(3, len(words) * 0.5) # Initial estimate, actual duration from TTS
138
- tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
139
- elements.append(tts_element)
140
- return elements
141
 
142
- def generate_tts(text, voice):
143
- """Generate TTS audio using Kokoro or gTTS as fallback."""
144
- safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
145
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
146
- if os.path.exists(file_path):
147
- print(f"Using cached TTS for text '{text[:10]}...'")
148
- return file_path
149
  try:
150
- kokoro_voice = selected_voice if voice == 'en' else voice
151
- generator = pipeline(text, voice=kokoro_voice, speed=voice_speed, split_pattern=r'\n+')
152
- audio_segments = [audio for _, _, audio in generator]
153
- full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
154
- sf.write(file_path, full_audio, 24000)
155
- print(f"TTS audio saved to {file_path} (Kokoro)")
 
 
156
  return file_path
157
  except Exception as e:
158
- print(f"Error with Kokoro TTS: {e}")
159
- try:
160
- print("Falling back to gTTS...")
161
- tts = gTTS(text=text, lang='en')
162
- mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
163
- tts.save(mp3_path)
164
- audio = AudioSegment.from_mp3(mp3_path)
165
- audio.export(file_path, format="wav")
166
- os.remove(mp3_path)
167
- print(f"Fallback TTS saved to {file_path} (gTTS)")
168
- return file_path
169
- except Exception as fallback_error:
170
- print(f"Both TTS methods failed: {fallback_error}")
171
- return None
172
-
173
- def resize_to_fill(clip, target_resolution):
174
- """Resize and crop clip to fill the target resolution."""
175
- target_w, target_h = target_resolution
176
- clip_aspect = clip.w / clip.h
177
- target_aspect = target_w / target_h
178
- if clip_aspect > target_aspect:
179
- clip = clip.resize(height=target_h)
180
- crop_amount = (clip.w - target_w) / 2
181
- clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
182
- else:
183
- clip = clip.resize(width=target_w)
184
- crop_amount = (clip.h - target_h) / 2
185
- clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
186
- return clip
187
-
188
- def add_background_music(final_video, bg_music_volume=0.08):
189
- """Add background music to the final video."""
190
- try:
191
- bg_music_path = "music.mp3"
192
- if os.path.exists(bg_music_path):
193
- print(f"Adding background music from: {bg_music_path}")
194
- bg_music = AudioFileClip(bg_music_path)
195
- if bg_music.duration < final_video.duration:
196
- loops_needed = math.ceil(final_video.duration / bg_music.duration)
197
- bg_segments = [bg_music] * loops_needed
198
- bg_music = concatenate_audioclips(bg_segments)
199
- bg_music = bg_music.subclip(0, final_video.duration)
200
- bg_music = bg_music.volumex(bg_music_volume)
201
- video_audio = final_video.audio
202
- mixed_audio = CompositeAudioClip([video_audio, bg_music])
203
- final_video = final_video.set_audio(mixed_audio)
204
- print("Background music added successfully")
205
- else:
206
- print("No music.mp3 found, skipping background music")
207
- return final_video
208
- except Exception as e:
209
- print(f"Error adding background music: {e}")
210
- return final_video
211
-
212
- def create_clip(video_path, start_time, duration, tts_path, narration_text, segment_index):
213
- """Create a video clip with synchronized captions."""
214
- try:
215
- print(f"Creating clip #{segment_index} from {start_time:.2f} to {start_time + duration:.2f}")
216
- video_clip = VideoFileClip(video_path).subclip(start_time, start_time + duration)
217
- video_clip = resize_to_fill(video_clip, TARGET_RESOLUTION)
218
- audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
219
- video_clip = video_clip.set_audio(audio_clip)
220
-
221
- if CAPTION_COLOR != "transparent" and narration_text:
222
- words = narration_text.split()
223
- chunks = [words[i:i+5] for i in range(0, len(words), 5)]
224
- chunk_duration = duration / len(chunks) if len(chunks) > 0 else duration
225
- subtitle_clips = []
226
- for i, chunk in enumerate(chunks):
227
- chunk_text = ' '.join(chunk)
228
- start_time = i * chunk_duration
229
- end_time = (i + 1) * chunk_duration if i < len(chunks) - 1 else duration
230
- txt_clip = TextClip(
231
- chunk_text,
232
- fontsize=font_size,
233
- font='Arial-Bold',
234
- color=CAPTION_COLOR,
235
- bg_color='rgba(0, 0, 0, 0.25)',
236
- method='caption',
237
- align='center',
238
- size=(TARGET_RESOLUTION[0] * 0.8, None)
239
- ).set_start(start_time).set_end(end_time).set_position(('center', int(TARGET_RESOLUTION[1] * 0.85)))
240
- subtitle_clips.append(txt_clip)
241
- video_clip = CompositeVideoClip([video_clip] + subtitle_clips)
242
-
243
- print(f"Clip created: {video_clip.duration:.1f}s")
244
- return video_clip
245
- except Exception as e:
246
- print(f"Error in create_clip: {str(e)}")
247
  return None
248
 
249
- def fix_imagemagick_policy():
250
- """Fix ImageMagick security policies for text rendering."""
 
 
 
 
 
 
 
 
 
 
 
251
  try:
252
- print("Attempting to fix ImageMagick security policies...")
253
- policy_paths = [
254
- "/etc/ImageMagick-6/policy.xml",
255
- "/etc/ImageMagick-7/policy.xml",
256
- "/etc/ImageMagick/policy.xml",
257
- "/usr/local/etc/ImageMagick-7/policy.xml"
258
- ]
259
- found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
260
- if not found_policy:
261
- print("No policy.xml found. Text rendering may fail.")
262
- return False
263
- print(f"Modifying policy file at {found_policy}")
264
- os.system(f"sudo cp {found_policy} {found_policy}.bak")
265
- os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
266
- os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
267
- print("ImageMagick policies updated successfully.")
268
- return True
269
- except Exception as e:
270
- print(f"Error fixing policies: {e}")
271
- return False
272
-
273
- def generate_video(user_input, resolution, caption_option):
274
- """Generate a video using video.mp4 with synchronized voice and captions."""
275
- global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
276
- TARGET_RESOLUTION = (1080, 1920) # Fixed as per requirement
277
- CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
 
 
 
 
 
 
 
 
278
  TEMP_FOLDER = tempfile.mkdtemp()
279
-
280
- fix_success = fix_imagemagick_policy()
281
- if not fix_success:
282
- print("Proceeding without ImageMagick policy fix.")
283
-
284
- print("Generating script from API...")
285
- script = generate_script(user_input)
286
- if not script:
287
- print("Failed to generate script.")
288
- shutil.rmtree(TEMP_FOLDER)
289
- return None
290
- print("Generated Script:\n", script)
291
-
292
- elements = parse_script(script)
293
- if not elements:
294
- print("Failed to parse script into elements.")
295
  shutil.rmtree(TEMP_FOLDER)
296
  return None
297
- print(f"Parsed {len(elements)} script segments.")
298
-
 
 
 
 
299
  video_path = "video.mp4"
300
  if not os.path.exists(video_path):
301
- print("video.mp4 not found in the current directory.")
302
  shutil.rmtree(TEMP_FOLDER)
303
  return None
304
-
305
- source_video = VideoFileClip(video_path)
306
- total_duration = source_video.duration
307
- source_video.close()
308
- print(f"Source video duration: {total_duration:.2f} seconds")
309
-
310
- clips = []
311
- for idx, tts_elem in enumerate(elements):
312
- print(f"\nProcessing segment {idx+1}/{len(elements)}")
313
- tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
314
- if not tts_path:
315
- print(f"Skipping segment {idx+1} due to TTS failure.")
316
- continue
317
-
318
- audio_clip = AudioFileClip(tts_path)
319
- segment_duration = audio_clip.duration
320
- audio_clip.close()
321
-
322
- max_start = total_duration - segment_duration
323
- if max_start <= 0:
324
- print(f"Segment duration {segment_duration:.2f}s exceeds video duration {total_duration:.2f}s.")
325
- continue
326
-
327
- start_time = random.uniform(0, max_start)
328
- clip = create_clip(
329
- video_path=video_path,
330
- start_time=start_time,
331
- duration=segment_duration,
332
- tts_path=tts_path,
333
- narration_text=tts_elem['text'],
334
- segment_index=idx
335
- )
336
- if clip:
337
- clips.append(clip)
338
- else:
339
- print(f"Clip creation failed for segment {idx+1}.")
340
-
341
- if not clips:
342
- print("No clips were successfully created.")
343
  shutil.rmtree(TEMP_FOLDER)
344
  return None
345
-
346
- print("\nConcatenating clips...")
347
- final_video = concatenate_videoclips(clips, method="compose")
348
- final_video = add_background_music(final_video, bg_music_volume=bg_music_volume)
349
-
350
- print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
351
- final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=fps, preset=preset)
352
- print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
353
-
354
- print("Cleaning up temporary files...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  shutil.rmtree(TEMP_FOLDER)
356
- print("Temporary files removed.")
357
-
358
  return OUTPUT_VIDEO_FILENAME
359
 
360
  # Gradio Interface
361
- VOICE_CHOICES = {
362
- 'Emma (Female)': 'af_heart',
363
- 'Bella (Female)': 'af_bella',
364
- 'Nicole (Female)': 'af_nicole',
365
- 'Aoede (Female)': 'af_aoede',
366
- 'Kore (Female)': 'af_kore',
367
- 'Sarah (Female)': 'af_sarah',
368
- 'Nova (Female)': 'af_nova',
369
- 'Sky (Female)': 'af_sky',
370
- 'Alloy (Female)': 'af_alloy',
371
- 'Jessica (Female)': 'af_jessica',
372
- 'River (Female)': 'af_river',
373
- 'Michael (Male)': 'am_michael',
374
- 'Fenrir (Male)': 'am_fenrir',
375
- 'Puck (Male)': 'am_puck',
376
- 'Echo (Male)': 'am_echo',
377
- 'Eric (Male)': 'am_eric',
378
- 'Liam (Male)': 'am_liam',
379
- 'Onyx (Male)': 'am_onyx',
380
- 'Santa (Male)': 'am_santa',
381
- 'Adam (Male)': 'am_adam',
382
- 'Emma 🇬🇧 (Female)': 'bf_emma',
383
- 'Isabella 🇬🇧 (Female)': 'bf_isabella',
384
- 'Alice 🇬🇧 (Female)': 'bf_alice',
385
- 'Lily 🇬🇧 (Female)': 'bf_lily',
386
- 'George 🇬🇧 (Male)': 'bm_george',
387
- 'Fable 🇬🇧 (Male)': 'bm_fable',
388
- 'Lewis 🇬🇧 (Male)': 'bm_lewis',
389
- 'Daniel 🇬🇧 (Male)': 'bm_daniel'
390
- }
391
-
392
- def generate_video_with_options(user_input, resolution, caption_option, music_file, voice, vclip_prob, bg_vol, video_fps, video_preset, v_speed, caption_size):
393
- """Wrapper function for Gradio interface to set global options."""
394
- global selected_voice, voice_speed, font_size, bg_music_volume, fps, preset
395
- selected_voice = VOICE_CHOICES[voice]
396
- voice_speed = v_speed
397
- font_size = caption_size
398
- bg_music_volume = bg_vol
399
- fps = video_fps
400
- preset = video_preset
401
- if music_file is not None:
402
- target_path = "music.mp3"
403
- shutil.copy(music_file.name, target_path)
404
- print(f"Uploaded music saved as: {target_path}")
405
- return generate_video(user_input, resolution, caption_option)
406
-
407
  iface = gr.Interface(
408
- fn=generate_video_with_options,
409
  inputs=[
410
- gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
411
- gr.Radio(["Full", "Short"], label="Resolution", value="Short", visible=False), # Hidden, fixed to Short
412
- gr.Radio(["Yes", "No"], label="Include Captions", value="No"),
413
- gr.File(label="Upload Background Music (MP3)", file_types=[".mp3"]),
414
- gr.Dropdown(choices=list(VOICE_CHOICES.keys()), label="Choose Voice", value="Emma (Female)"),
415
- gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)", visible=False), # Unused
416
- gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
417
- gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
418
- gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
419
- value="veryfast", label="Export Preset"),
420
- gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
421
- gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
422
  ],
423
  outputs=gr.Video(label="Generated Video"),
424
- title="AI Documentary Video Generator",
425
- description="Create short documentary videos using video.mp4 with AI narration and synced captions."
426
  )
427
 
428
  if __name__ == "__main__":
429
- iface.launch(share=True)
 
 
 
 
1
  import os
2
+ from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip
 
 
3
  import tempfile
4
  import random
5
  import shutil
 
 
6
  from gtts import gTTS
7
+ from PIL import Image, ImageDraw, ImageFont
8
+ import numpy as np
9
+ import textwrap
10
  import gradio as gr
 
 
 
 
 
 
 
 
11
 
12
  # Global Configuration
 
 
13
  OUTPUT_VIDEO_FILENAME = "final_video.mp4"
14
+ TARGET_RESOLUTION = (1080, 1920) # Vertical video resolution
 
 
15
  TEMP_FOLDER = None
16
+ font_size = 45
17
+ fps = 30
18
+ preset = "veryfast"
19
+ bg_music_volume = 0.08
20
+
21
+ def generate_dummy_script():
22
+ """Generate a dummy script that results in approximately 64 seconds of narration."""
23
+ return """
24
+ [Intro]
25
+ The world is full of natural wonders.
26
+ [Forests]
27
+ Forests cover vast regions of the planet.
28
+ [Rivers]
29
+ Rivers flow through landscapes shaping the earth.
30
+ [Mountains]
31
+ Mountains stand tall against the sky above.
32
+ [Oceans]
33
+ Oceans hold mysteries beneath their waves.
34
+ [Wildlife]
35
+ Wildlife thrives in diverse habitats worldwide.
36
+ [Conclusion]
37
+ Nature continues to inspire us all.
38
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  def parse_script(script_text):
41
+ """Parse the script to extract full narration text."""
42
  sections = {}
43
  current_title = None
44
  current_text = ""
 
56
  current_text += line + " "
57
  if current_title:
58
  sections[current_title] = current_text.strip()
59
+ full_narration = " ".join(sections.values())
60
+ return full_narration
 
 
 
 
 
 
 
61
 
62
+ def generate_tts(text):
63
+ """Generate TTS audio for the full narration."""
64
+ safe_text = "narration"
65
  file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
 
 
 
66
  try:
67
+ tts = gTTS(text=text, lang='en')
68
+ mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
69
+ tts.save(mp3_path)
70
+ from pydub import AudioSegment
71
+ audio = AudioSegment.from_mp3(mp3_path)
72
+ audio.export(file_path, format="wav")
73
+ os.remove(mp3_path)
74
+ print(f"TTS audio saved to {file_path}")
75
  return file_path
76
  except Exception as e:
77
+ print(f"TTS generation failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  return None
79
 
80
+ def get_audio_duration(audio_path):
81
+ """Get the duration of the audio file."""
82
+ audio = AudioFileClip(audio_path)
83
+ duration = audio.duration
84
+ audio.close()
85
+ return duration
86
+
87
+ def generate_subtitle_image(text, font_path="arial.ttf", font_size=45, text_color=(255, 255, 255, 255), bg_color=(0, 0, 0, 64), size=(1080, 200)):
88
+ """Generate a subtitle image with wrapped text."""
89
+ img = Image.new('RGBA', size, (0, 0, 0, 0)) # Transparent background
90
+ draw = ImageDraw.Draw(img)
91
+ if bg_color:
92
+ draw.rectangle([(0, 0), size], fill=bg_color)
93
  try:
94
+ font = ImageFont.truetype(font_path, font_size)
95
+ except IOError:
96
+ font = ImageFont.load_default()
97
+ lines = textwrap.wrap(text, width=40)
98
+ line_height = font.getsize('hg')[1]
99
+ total_height = line_height * len(lines)
100
+ y_start = (size[1] - total_height) / 2
101
+ for i, line in enumerate(lines):
102
+ text_width, _ = draw.textsize(line, font=font)
103
+ x = (size[0] - text_width) / 2
104
+ y = y_start + i * line_height
105
+ draw.text((x, y), line, font=font, fill=text_color)
106
+ return np.array(img)
107
+
108
+ def add_background_music(video_clip):
109
+ """Add background music to the video if available."""
110
+ bg_music_path = "music.mp3"
111
+ if os.path.exists(bg_music_path):
112
+ bg_music = AudioFileClip(bg_music_path)
113
+ if bg_music.duration < video_clip.duration:
114
+ from moviepy.audio.AudioClip import concatenate_audioclips
115
+ loops_needed = int(video_clip.duration / bg_music.duration) + 1
116
+ bg_music = concatenate_audioclips([bg_music] * loops_needed)
117
+ bg_music = bg_music.subclip(0, video_clip.duration)
118
+ bg_music = bg_music.volumex(bg_music_volume)
119
+ video_audio = video_clip.audio
120
+ from moviepy.audio.AudioClip import CompositeAudioClip
121
+ mixed_audio = CompositeAudioClip([video_audio, bg_music])
122
+ video_clip = video_clip.set_audio(mixed_audio)
123
+ return video_clip
124
+
125
+ def generate_video(user_input, include_captions):
126
+ """Generate a video with voiceover and optional captions."""
127
+ global TEMP_FOLDER
128
  TEMP_FOLDER = tempfile.mkdtemp()
129
+
130
+ # Generate and parse script
131
+ script = generate_dummy_script() # Using dummy script for 64s duration
132
+ full_narration = parse_script(script)
133
+ print("Full Narration:", full_narration)
134
+
135
+ # Generate voiceover
136
+ tts_path = generate_tts(full_narration)
137
+ if not tts_path:
 
 
 
 
 
 
 
138
  shutil.rmtree(TEMP_FOLDER)
139
  return None
140
+
141
+ # Assuming voiceover is 64 seconds as per requirement
142
+ audio_duration = 64 # Hardcoded for this example
143
+ video_duration = audio_duration + 0.5 # 64.5 seconds clip
144
+
145
+ # Cut video clip from 13-minute video
146
  video_path = "video.mp4"
147
  if not os.path.exists(video_path):
148
+ print("video.mp4 not found.")
149
  shutil.rmtree(TEMP_FOLDER)
150
  return None
151
+
152
+ long_video = VideoFileClip(video_path)
153
+ total_duration = long_video.duration # 13 minutes = 780 seconds
154
+ if total_duration < video_duration:
155
+ print("Video is too short.")
156
+ long_video.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  shutil.rmtree(TEMP_FOLDER)
158
  return None
159
+
160
+ start_time = random.uniform(0, total_duration - video_duration)
161
+ video_clip = long_video.subclip(start_time, start_time + video_duration)
162
+ long_video.close()
163
+
164
+ # Set voiceover audio
165
+ video_clip = video_clip.set_audio(AudioFileClip(tts_path))
166
+
167
+ # Add captions if requested
168
+ if include_captions == "Yes":
169
+ words = full_narration.split()
170
+ num_words = len(words)
171
+ word_duration = audio_duration / num_words # Timing based on audio_duration
172
+ chunks = [words[i:i+5] for i in range(0, num_words, 5)]
173
+ subtitle_clips = []
174
+ for i, chunk in enumerate(chunks):
175
+ chunk_text = ' '.join(chunk)
176
+ start_idx = i * 5
177
+ end_idx = start_idx + len(chunk) - 1
178
+ start_time_chunk = start_idx * word_duration
179
+ end_time_chunk = min((end_idx + 1) * word_duration, audio_duration)
180
+ subtitle_img = generate_subtitle_image(chunk_text, font_size=font_size)
181
+ txt_clip = ImageClip(subtitle_img).set_start(start_time_chunk).set_duration(end_time_chunk - start_time_chunk)
182
+ txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1] - 200))
183
+ subtitle_clips.append(txt_clip)
184
+ video_clip = CompositeVideoClip([video_clip] + subtitle_clips)
185
+
186
+ # Add background music
187
+ video_clip = add_background_music(video_clip)
188
+
189
+ # Export video
190
+ video_clip.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=fps, preset=preset)
191
+ print(f"Video saved as {OUTPUT_VIDEO_FILENAME}")
192
+
193
+ # Cleanup
194
  shutil.rmtree(TEMP_FOLDER)
 
 
195
  return OUTPUT_VIDEO_FILENAME
196
 
197
  # Gradio Interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  iface = gr.Interface(
199
+ fn=generate_video,
200
  inputs=[
201
+ gr.Textbox(label="Video Concept", placeholder="Enter concept (ignored for this example)"),
202
+ gr.Radio(["Yes", "No"], label="Include Captions", value="No")
 
 
 
 
 
 
 
 
 
 
203
  ],
204
  outputs=gr.Video(label="Generated Video"),
205
+ title="Video Generator",
206
+ description="Generates a 64.5s video clip with a 64s voiceover from a 13min video."
207
  )
208
 
209
  if __name__ == "__main__":
210
+ iface.launch()