Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,117 +1,44 @@
|
|
1 |
-
# Import necessary libraries
|
2 |
-
from kokoro import KPipeline
|
3 |
-
import soundfile as sf
|
4 |
import os
|
5 |
-
from moviepy.editor import
|
6 |
-
VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeVideoClip, TextClip, CompositeAudioClip
|
7 |
-
)
|
8 |
import tempfile
|
9 |
import random
|
10 |
import shutil
|
11 |
-
import moviepy.config as mpy_config
|
12 |
-
from pydub import AudioSegment
|
13 |
from gtts import gTTS
|
|
|
|
|
|
|
14 |
import gradio as gr
|
15 |
-
import requests
|
16 |
-
import re
|
17 |
-
|
18 |
-
# Initialize Kokoro TTS pipeline (using American English)
|
19 |
-
pipeline = KPipeline(lang_code='a') # Use voice 'af_heart' for American English
|
20 |
-
|
21 |
-
# Ensure ImageMagick binary is set
|
22 |
-
mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
|
23 |
|
24 |
# Global Configuration
|
25 |
-
OPENROUTER_API_KEY = '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'
|
26 |
-
OPENROUTER_MODEL = "google/gemini-2.0-flash-exp:free"
|
27 |
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
|
28 |
-
|
29 |
-
TARGET_RESOLUTION = (1080, 1920) # Fixed vertical resolution
|
30 |
-
CAPTION_COLOR = None
|
31 |
TEMP_FOLDER = None
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
bg_music_volume = 0.08
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
- Use natural, formal English. No slang, no fake AI language, and no robotic tone.
|
56 |
-
- Do not use humor, sarcasm, or casual language. This is a serious narration.
|
57 |
-
- No emotion-sound words like “aww,” “eww,” “whoa,” etc.
|
58 |
-
- Do not use numbers like 1, 2, 3 — write them out as one, two, three.
|
59 |
-
- At the end, add a [Subscribe] tag with a formal or respectful reason to follow or subscribe.
|
60 |
-
|
61 |
-
Only output the script. No extra comments or text.
|
62 |
-
|
63 |
-
Example:
|
64 |
-
|
65 |
-
[Ocean]
|
66 |
-
|
67 |
-
The ocean covers over seventy percent of the Earth's surface.
|
68 |
-
|
69 |
-
[Currents]
|
70 |
-
|
71 |
-
Ocean currents distribute heat and regulate global climate patterns.
|
72 |
-
|
73 |
-
[Coral Reefs]
|
74 |
-
|
75 |
-
These ecosystems support over one million species of marine life.
|
76 |
-
|
77 |
-
[Pollution]
|
78 |
-
|
79 |
-
Plastic waste threatens marine biodiversity and food chains.
|
80 |
-
|
81 |
-
[Climate Impact]
|
82 |
-
|
83 |
-
Rising temperatures are causing coral bleaching and habitat loss.
|
84 |
-
|
85 |
-
[Subscribe]
|
86 |
-
|
87 |
-
Follow to explore more about the changing planet we live on.
|
88 |
-
|
89 |
-
Topic: {user_input}
|
90 |
-
"""
|
91 |
-
data = {
|
92 |
-
'model': OPENROUTER_MODEL,
|
93 |
-
'messages': [{'role': 'user', 'content': prompt}],
|
94 |
-
'temperature': 0.4,
|
95 |
-
'max_tokens': 5000
|
96 |
-
}
|
97 |
-
try:
|
98 |
-
response = requests.post(
|
99 |
-
'https://openrouter.ai/api/v1/chat/completions',
|
100 |
-
headers=headers,
|
101 |
-
json=data,
|
102 |
-
timeout=30
|
103 |
-
)
|
104 |
-
if response.status_code == 200:
|
105 |
-
return response.json()['choices'][0]['message']['content']
|
106 |
-
else:
|
107 |
-
print(f"API Error {response.status_code}: {response.text}")
|
108 |
-
return None
|
109 |
-
except Exception as e:
|
110 |
-
print(f"Request failed: {str(e)}")
|
111 |
-
return None
|
112 |
|
113 |
def parse_script(script_text):
|
114 |
-
"""Parse the script
|
115 |
sections = {}
|
116 |
current_title = None
|
117 |
current_text = ""
|
@@ -129,301 +56,155 @@ def parse_script(script_text):
|
|
129 |
current_text += line + " "
|
130 |
if current_title:
|
131 |
sections[current_title] = current_text.strip()
|
132 |
-
|
133 |
-
|
134 |
-
if not narration:
|
135 |
-
continue
|
136 |
-
words = narration.split()
|
137 |
-
duration = max(3, len(words) * 0.5) # Initial estimate, actual duration from TTS
|
138 |
-
tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
|
139 |
-
elements.append(tts_element)
|
140 |
-
return elements
|
141 |
|
142 |
-
def generate_tts(text
|
143 |
-
"""Generate TTS audio
|
144 |
-
safe_text =
|
145 |
file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
|
146 |
-
if os.path.exists(file_path):
|
147 |
-
print(f"Using cached TTS for text '{text[:10]}...'")
|
148 |
-
return file_path
|
149 |
try:
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
156 |
return file_path
|
157 |
except Exception as e:
|
158 |
-
print(f"
|
159 |
-
try:
|
160 |
-
print("Falling back to gTTS...")
|
161 |
-
tts = gTTS(text=text, lang='en')
|
162 |
-
mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
|
163 |
-
tts.save(mp3_path)
|
164 |
-
audio = AudioSegment.from_mp3(mp3_path)
|
165 |
-
audio.export(file_path, format="wav")
|
166 |
-
os.remove(mp3_path)
|
167 |
-
print(f"Fallback TTS saved to {file_path} (gTTS)")
|
168 |
-
return file_path
|
169 |
-
except Exception as fallback_error:
|
170 |
-
print(f"Both TTS methods failed: {fallback_error}")
|
171 |
-
return None
|
172 |
-
|
173 |
-
def resize_to_fill(clip, target_resolution):
|
174 |
-
"""Resize and crop clip to fill the target resolution."""
|
175 |
-
target_w, target_h = target_resolution
|
176 |
-
clip_aspect = clip.w / clip.h
|
177 |
-
target_aspect = target_w / target_h
|
178 |
-
if clip_aspect > target_aspect:
|
179 |
-
clip = clip.resize(height=target_h)
|
180 |
-
crop_amount = (clip.w - target_w) / 2
|
181 |
-
clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
|
182 |
-
else:
|
183 |
-
clip = clip.resize(width=target_w)
|
184 |
-
crop_amount = (clip.h - target_h) / 2
|
185 |
-
clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
|
186 |
-
return clip
|
187 |
-
|
188 |
-
def add_background_music(final_video, bg_music_volume=0.08):
|
189 |
-
"""Add background music to the final video."""
|
190 |
-
try:
|
191 |
-
bg_music_path = "music.mp3"
|
192 |
-
if os.path.exists(bg_music_path):
|
193 |
-
print(f"Adding background music from: {bg_music_path}")
|
194 |
-
bg_music = AudioFileClip(bg_music_path)
|
195 |
-
if bg_music.duration < final_video.duration:
|
196 |
-
loops_needed = math.ceil(final_video.duration / bg_music.duration)
|
197 |
-
bg_segments = [bg_music] * loops_needed
|
198 |
-
bg_music = concatenate_audioclips(bg_segments)
|
199 |
-
bg_music = bg_music.subclip(0, final_video.duration)
|
200 |
-
bg_music = bg_music.volumex(bg_music_volume)
|
201 |
-
video_audio = final_video.audio
|
202 |
-
mixed_audio = CompositeAudioClip([video_audio, bg_music])
|
203 |
-
final_video = final_video.set_audio(mixed_audio)
|
204 |
-
print("Background music added successfully")
|
205 |
-
else:
|
206 |
-
print("No music.mp3 found, skipping background music")
|
207 |
-
return final_video
|
208 |
-
except Exception as e:
|
209 |
-
print(f"Error adding background music: {e}")
|
210 |
-
return final_video
|
211 |
-
|
212 |
-
def create_clip(video_path, start_time, duration, tts_path, narration_text, segment_index):
|
213 |
-
"""Create a video clip with synchronized captions."""
|
214 |
-
try:
|
215 |
-
print(f"Creating clip #{segment_index} from {start_time:.2f} to {start_time + duration:.2f}")
|
216 |
-
video_clip = VideoFileClip(video_path).subclip(start_time, start_time + duration)
|
217 |
-
video_clip = resize_to_fill(video_clip, TARGET_RESOLUTION)
|
218 |
-
audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
|
219 |
-
video_clip = video_clip.set_audio(audio_clip)
|
220 |
-
|
221 |
-
if CAPTION_COLOR != "transparent" and narration_text:
|
222 |
-
words = narration_text.split()
|
223 |
-
chunks = [words[i:i+5] for i in range(0, len(words), 5)]
|
224 |
-
chunk_duration = duration / len(chunks) if len(chunks) > 0 else duration
|
225 |
-
subtitle_clips = []
|
226 |
-
for i, chunk in enumerate(chunks):
|
227 |
-
chunk_text = ' '.join(chunk)
|
228 |
-
start_time = i * chunk_duration
|
229 |
-
end_time = (i + 1) * chunk_duration if i < len(chunks) - 1 else duration
|
230 |
-
txt_clip = TextClip(
|
231 |
-
chunk_text,
|
232 |
-
fontsize=font_size,
|
233 |
-
font='Arial-Bold',
|
234 |
-
color=CAPTION_COLOR,
|
235 |
-
bg_color='rgba(0, 0, 0, 0.25)',
|
236 |
-
method='caption',
|
237 |
-
align='center',
|
238 |
-
size=(TARGET_RESOLUTION[0] * 0.8, None)
|
239 |
-
).set_start(start_time).set_end(end_time).set_position(('center', int(TARGET_RESOLUTION[1] * 0.85)))
|
240 |
-
subtitle_clips.append(txt_clip)
|
241 |
-
video_clip = CompositeVideoClip([video_clip] + subtitle_clips)
|
242 |
-
|
243 |
-
print(f"Clip created: {video_clip.duration:.1f}s")
|
244 |
-
return video_clip
|
245 |
-
except Exception as e:
|
246 |
-
print(f"Error in create_clip: {str(e)}")
|
247 |
return None
|
248 |
|
249 |
-
def
|
250 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
try:
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
TEMP_FOLDER = tempfile.mkdtemp()
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
shutil.rmtree(TEMP_FOLDER)
|
289 |
-
return None
|
290 |
-
print("Generated Script:\n", script)
|
291 |
-
|
292 |
-
elements = parse_script(script)
|
293 |
-
if not elements:
|
294 |
-
print("Failed to parse script into elements.")
|
295 |
shutil.rmtree(TEMP_FOLDER)
|
296 |
return None
|
297 |
-
|
298 |
-
|
|
|
|
|
|
|
|
|
299 |
video_path = "video.mp4"
|
300 |
if not os.path.exists(video_path):
|
301 |
-
print("video.mp4 not found
|
302 |
shutil.rmtree(TEMP_FOLDER)
|
303 |
return None
|
304 |
-
|
305 |
-
|
306 |
-
total_duration =
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
clips = []
|
311 |
-
for idx, tts_elem in enumerate(elements):
|
312 |
-
print(f"\nProcessing segment {idx+1}/{len(elements)}")
|
313 |
-
tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
|
314 |
-
if not tts_path:
|
315 |
-
print(f"Skipping segment {idx+1} due to TTS failure.")
|
316 |
-
continue
|
317 |
-
|
318 |
-
audio_clip = AudioFileClip(tts_path)
|
319 |
-
segment_duration = audio_clip.duration
|
320 |
-
audio_clip.close()
|
321 |
-
|
322 |
-
max_start = total_duration - segment_duration
|
323 |
-
if max_start <= 0:
|
324 |
-
print(f"Segment duration {segment_duration:.2f}s exceeds video duration {total_duration:.2f}s.")
|
325 |
-
continue
|
326 |
-
|
327 |
-
start_time = random.uniform(0, max_start)
|
328 |
-
clip = create_clip(
|
329 |
-
video_path=video_path,
|
330 |
-
start_time=start_time,
|
331 |
-
duration=segment_duration,
|
332 |
-
tts_path=tts_path,
|
333 |
-
narration_text=tts_elem['text'],
|
334 |
-
segment_index=idx
|
335 |
-
)
|
336 |
-
if clip:
|
337 |
-
clips.append(clip)
|
338 |
-
else:
|
339 |
-
print(f"Clip creation failed for segment {idx+1}.")
|
340 |
-
|
341 |
-
if not clips:
|
342 |
-
print("No clips were successfully created.")
|
343 |
shutil.rmtree(TEMP_FOLDER)
|
344 |
return None
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
shutil.rmtree(TEMP_FOLDER)
|
356 |
-
print("Temporary files removed.")
|
357 |
-
|
358 |
return OUTPUT_VIDEO_FILENAME
|
359 |
|
360 |
# Gradio Interface
|
361 |
-
VOICE_CHOICES = {
|
362 |
-
'Emma (Female)': 'af_heart',
|
363 |
-
'Bella (Female)': 'af_bella',
|
364 |
-
'Nicole (Female)': 'af_nicole',
|
365 |
-
'Aoede (Female)': 'af_aoede',
|
366 |
-
'Kore (Female)': 'af_kore',
|
367 |
-
'Sarah (Female)': 'af_sarah',
|
368 |
-
'Nova (Female)': 'af_nova',
|
369 |
-
'Sky (Female)': 'af_sky',
|
370 |
-
'Alloy (Female)': 'af_alloy',
|
371 |
-
'Jessica (Female)': 'af_jessica',
|
372 |
-
'River (Female)': 'af_river',
|
373 |
-
'Michael (Male)': 'am_michael',
|
374 |
-
'Fenrir (Male)': 'am_fenrir',
|
375 |
-
'Puck (Male)': 'am_puck',
|
376 |
-
'Echo (Male)': 'am_echo',
|
377 |
-
'Eric (Male)': 'am_eric',
|
378 |
-
'Liam (Male)': 'am_liam',
|
379 |
-
'Onyx (Male)': 'am_onyx',
|
380 |
-
'Santa (Male)': 'am_santa',
|
381 |
-
'Adam (Male)': 'am_adam',
|
382 |
-
'Emma 🇬🇧 (Female)': 'bf_emma',
|
383 |
-
'Isabella 🇬🇧 (Female)': 'bf_isabella',
|
384 |
-
'Alice 🇬🇧 (Female)': 'bf_alice',
|
385 |
-
'Lily 🇬🇧 (Female)': 'bf_lily',
|
386 |
-
'George 🇬🇧 (Male)': 'bm_george',
|
387 |
-
'Fable 🇬🇧 (Male)': 'bm_fable',
|
388 |
-
'Lewis 🇬🇧 (Male)': 'bm_lewis',
|
389 |
-
'Daniel 🇬🇧 (Male)': 'bm_daniel'
|
390 |
-
}
|
391 |
-
|
392 |
-
def generate_video_with_options(user_input, resolution, caption_option, music_file, voice, vclip_prob, bg_vol, video_fps, video_preset, v_speed, caption_size):
|
393 |
-
"""Wrapper function for Gradio interface to set global options."""
|
394 |
-
global selected_voice, voice_speed, font_size, bg_music_volume, fps, preset
|
395 |
-
selected_voice = VOICE_CHOICES[voice]
|
396 |
-
voice_speed = v_speed
|
397 |
-
font_size = caption_size
|
398 |
-
bg_music_volume = bg_vol
|
399 |
-
fps = video_fps
|
400 |
-
preset = video_preset
|
401 |
-
if music_file is not None:
|
402 |
-
target_path = "music.mp3"
|
403 |
-
shutil.copy(music_file.name, target_path)
|
404 |
-
print(f"Uploaded music saved as: {target_path}")
|
405 |
-
return generate_video(user_input, resolution, caption_option)
|
406 |
-
|
407 |
iface = gr.Interface(
|
408 |
-
fn=
|
409 |
inputs=[
|
410 |
-
gr.Textbox(label="Video Concept", placeholder="Enter
|
411 |
-
gr.Radio(["
|
412 |
-
gr.Radio(["Yes", "No"], label="Include Captions", value="No"),
|
413 |
-
gr.File(label="Upload Background Music (MP3)", file_types=[".mp3"]),
|
414 |
-
gr.Dropdown(choices=list(VOICE_CHOICES.keys()), label="Choose Voice", value="Emma (Female)"),
|
415 |
-
gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)", visible=False), # Unused
|
416 |
-
gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
|
417 |
-
gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
|
418 |
-
gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
|
419 |
-
value="veryfast", label="Export Preset"),
|
420 |
-
gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
|
421 |
-
gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
|
422 |
],
|
423 |
outputs=gr.Video(label="Generated Video"),
|
424 |
-
title="
|
425 |
-
description="
|
426 |
)
|
427 |
|
428 |
if __name__ == "__main__":
|
429 |
-
iface.launch(
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip
|
|
|
|
|
3 |
import tempfile
|
4 |
import random
|
5 |
import shutil
|
|
|
|
|
6 |
from gtts import gTTS
|
7 |
+
from PIL import Image, ImageDraw, ImageFont
|
8 |
+
import numpy as np
|
9 |
+
import textwrap
|
10 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# Global Configuration
|
|
|
|
|
13 |
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
|
14 |
+
TARGET_RESOLUTION = (1080, 1920) # Vertical video resolution
|
|
|
|
|
15 |
TEMP_FOLDER = None
|
16 |
+
font_size = 45
|
17 |
+
fps = 30
|
18 |
+
preset = "veryfast"
|
19 |
+
bg_music_volume = 0.08
|
20 |
+
|
21 |
+
def generate_dummy_script():
|
22 |
+
"""Generate a dummy script that results in approximately 64 seconds of narration."""
|
23 |
+
return """
|
24 |
+
[Intro]
|
25 |
+
The world is full of natural wonders.
|
26 |
+
[Forests]
|
27 |
+
Forests cover vast regions of the planet.
|
28 |
+
[Rivers]
|
29 |
+
Rivers flow through landscapes shaping the earth.
|
30 |
+
[Mountains]
|
31 |
+
Mountains stand tall against the sky above.
|
32 |
+
[Oceans]
|
33 |
+
Oceans hold mysteries beneath their waves.
|
34 |
+
[Wildlife]
|
35 |
+
Wildlife thrives in diverse habitats worldwide.
|
36 |
+
[Conclusion]
|
37 |
+
Nature continues to inspire us all.
|
38 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
def parse_script(script_text):
|
41 |
+
"""Parse the script to extract full narration text."""
|
42 |
sections = {}
|
43 |
current_title = None
|
44 |
current_text = ""
|
|
|
56 |
current_text += line + " "
|
57 |
if current_title:
|
58 |
sections[current_title] = current_text.strip()
|
59 |
+
full_narration = " ".join(sections.values())
|
60 |
+
return full_narration
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
def generate_tts(text):
|
63 |
+
"""Generate TTS audio for the full narration."""
|
64 |
+
safe_text = "narration"
|
65 |
file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
|
|
|
|
|
|
|
66 |
try:
|
67 |
+
tts = gTTS(text=text, lang='en')
|
68 |
+
mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
|
69 |
+
tts.save(mp3_path)
|
70 |
+
from pydub import AudioSegment
|
71 |
+
audio = AudioSegment.from_mp3(mp3_path)
|
72 |
+
audio.export(file_path, format="wav")
|
73 |
+
os.remove(mp3_path)
|
74 |
+
print(f"TTS audio saved to {file_path}")
|
75 |
return file_path
|
76 |
except Exception as e:
|
77 |
+
print(f"TTS generation failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
return None
|
79 |
|
80 |
+
def get_audio_duration(audio_path):
|
81 |
+
"""Get the duration of the audio file."""
|
82 |
+
audio = AudioFileClip(audio_path)
|
83 |
+
duration = audio.duration
|
84 |
+
audio.close()
|
85 |
+
return duration
|
86 |
+
|
87 |
+
def generate_subtitle_image(text, font_path="arial.ttf", font_size=45, text_color=(255, 255, 255, 255), bg_color=(0, 0, 0, 64), size=(1080, 200)):
|
88 |
+
"""Generate a subtitle image with wrapped text."""
|
89 |
+
img = Image.new('RGBA', size, (0, 0, 0, 0)) # Transparent background
|
90 |
+
draw = ImageDraw.Draw(img)
|
91 |
+
if bg_color:
|
92 |
+
draw.rectangle([(0, 0), size], fill=bg_color)
|
93 |
try:
|
94 |
+
font = ImageFont.truetype(font_path, font_size)
|
95 |
+
except IOError:
|
96 |
+
font = ImageFont.load_default()
|
97 |
+
lines = textwrap.wrap(text, width=40)
|
98 |
+
line_height = font.getsize('hg')[1]
|
99 |
+
total_height = line_height * len(lines)
|
100 |
+
y_start = (size[1] - total_height) / 2
|
101 |
+
for i, line in enumerate(lines):
|
102 |
+
text_width, _ = draw.textsize(line, font=font)
|
103 |
+
x = (size[0] - text_width) / 2
|
104 |
+
y = y_start + i * line_height
|
105 |
+
draw.text((x, y), line, font=font, fill=text_color)
|
106 |
+
return np.array(img)
|
107 |
+
|
108 |
+
def add_background_music(video_clip):
|
109 |
+
"""Add background music to the video if available."""
|
110 |
+
bg_music_path = "music.mp3"
|
111 |
+
if os.path.exists(bg_music_path):
|
112 |
+
bg_music = AudioFileClip(bg_music_path)
|
113 |
+
if bg_music.duration < video_clip.duration:
|
114 |
+
from moviepy.audio.AudioClip import concatenate_audioclips
|
115 |
+
loops_needed = int(video_clip.duration / bg_music.duration) + 1
|
116 |
+
bg_music = concatenate_audioclips([bg_music] * loops_needed)
|
117 |
+
bg_music = bg_music.subclip(0, video_clip.duration)
|
118 |
+
bg_music = bg_music.volumex(bg_music_volume)
|
119 |
+
video_audio = video_clip.audio
|
120 |
+
from moviepy.audio.AudioClip import CompositeAudioClip
|
121 |
+
mixed_audio = CompositeAudioClip([video_audio, bg_music])
|
122 |
+
video_clip = video_clip.set_audio(mixed_audio)
|
123 |
+
return video_clip
|
124 |
+
|
125 |
+
def generate_video(user_input, include_captions):
|
126 |
+
"""Generate a video with voiceover and optional captions."""
|
127 |
+
global TEMP_FOLDER
|
128 |
TEMP_FOLDER = tempfile.mkdtemp()
|
129 |
+
|
130 |
+
# Generate and parse script
|
131 |
+
script = generate_dummy_script() # Using dummy script for 64s duration
|
132 |
+
full_narration = parse_script(script)
|
133 |
+
print("Full Narration:", full_narration)
|
134 |
+
|
135 |
+
# Generate voiceover
|
136 |
+
tts_path = generate_tts(full_narration)
|
137 |
+
if not tts_path:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
shutil.rmtree(TEMP_FOLDER)
|
139 |
return None
|
140 |
+
|
141 |
+
# Assuming voiceover is 64 seconds as per requirement
|
142 |
+
audio_duration = 64 # Hardcoded for this example
|
143 |
+
video_duration = audio_duration + 0.5 # 64.5 seconds clip
|
144 |
+
|
145 |
+
# Cut video clip from 13-minute video
|
146 |
video_path = "video.mp4"
|
147 |
if not os.path.exists(video_path):
|
148 |
+
print("video.mp4 not found.")
|
149 |
shutil.rmtree(TEMP_FOLDER)
|
150 |
return None
|
151 |
+
|
152 |
+
long_video = VideoFileClip(video_path)
|
153 |
+
total_duration = long_video.duration # 13 minutes = 780 seconds
|
154 |
+
if total_duration < video_duration:
|
155 |
+
print("Video is too short.")
|
156 |
+
long_video.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
shutil.rmtree(TEMP_FOLDER)
|
158 |
return None
|
159 |
+
|
160 |
+
start_time = random.uniform(0, total_duration - video_duration)
|
161 |
+
video_clip = long_video.subclip(start_time, start_time + video_duration)
|
162 |
+
long_video.close()
|
163 |
+
|
164 |
+
# Set voiceover audio
|
165 |
+
video_clip = video_clip.set_audio(AudioFileClip(tts_path))
|
166 |
+
|
167 |
+
# Add captions if requested
|
168 |
+
if include_captions == "Yes":
|
169 |
+
words = full_narration.split()
|
170 |
+
num_words = len(words)
|
171 |
+
word_duration = audio_duration / num_words # Timing based on audio_duration
|
172 |
+
chunks = [words[i:i+5] for i in range(0, num_words, 5)]
|
173 |
+
subtitle_clips = []
|
174 |
+
for i, chunk in enumerate(chunks):
|
175 |
+
chunk_text = ' '.join(chunk)
|
176 |
+
start_idx = i * 5
|
177 |
+
end_idx = start_idx + len(chunk) - 1
|
178 |
+
start_time_chunk = start_idx * word_duration
|
179 |
+
end_time_chunk = min((end_idx + 1) * word_duration, audio_duration)
|
180 |
+
subtitle_img = generate_subtitle_image(chunk_text, font_size=font_size)
|
181 |
+
txt_clip = ImageClip(subtitle_img).set_start(start_time_chunk).set_duration(end_time_chunk - start_time_chunk)
|
182 |
+
txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1] - 200))
|
183 |
+
subtitle_clips.append(txt_clip)
|
184 |
+
video_clip = CompositeVideoClip([video_clip] + subtitle_clips)
|
185 |
+
|
186 |
+
# Add background music
|
187 |
+
video_clip = add_background_music(video_clip)
|
188 |
+
|
189 |
+
# Export video
|
190 |
+
video_clip.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=fps, preset=preset)
|
191 |
+
print(f"Video saved as {OUTPUT_VIDEO_FILENAME}")
|
192 |
+
|
193 |
+
# Cleanup
|
194 |
shutil.rmtree(TEMP_FOLDER)
|
|
|
|
|
195 |
return OUTPUT_VIDEO_FILENAME
|
196 |
|
197 |
# Gradio Interface
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
iface = gr.Interface(
|
199 |
+
fn=generate_video,
|
200 |
inputs=[
|
201 |
+
gr.Textbox(label="Video Concept", placeholder="Enter concept (ignored for this example)"),
|
202 |
+
gr.Radio(["Yes", "No"], label="Include Captions", value="No")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
],
|
204 |
outputs=gr.Video(label="Generated Video"),
|
205 |
+
title="Video Generator",
|
206 |
+
description="Generates a 64.5s video clip with a 64s voiceover from a 13min video."
|
207 |
)
|
208 |
|
209 |
if __name__ == "__main__":
|
210 |
+
iface.launch()
|