Spaces:
Running
Running
# Import necessary libraries | |
import gradio as gr | |
import os | |
import shutil | |
import tempfile | |
import random | |
import requests | |
import soundfile as sf | |
from moviepy.editor import ( | |
VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip, CompositeVideoClip, TextClip | |
) | |
import moviepy.video.fx.all as vfx | |
from kokoro import KPipeline | |
from gtts import gTTS | |
from pydub import AudioSegment | |
import math | |
import re | |
from PIL import Image | |
# Initialize Kokoro TTS pipeline (using American English) | |
pipeline = KPipeline(lang_code='a') | |
# Global Configuration | |
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna' | |
OPENROUTER_API_KEY = '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' | |
OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free" | |
OUTPUT_VIDEO_FILENAME = "final_video.mp4" | |
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
# Helper Functions | |
def generate_script(user_input): | |
"""Generate a documentary script using OpenRouter API.""" | |
headers = { | |
'Authorization': f'Bearer {OPENROUTER_API_KEY}', | |
'HTTP-Referer': 'https://your-domain.com', | |
'X-Title': 'AI Documentary Maker' | |
} | |
prompt = f"""Short Documentary Script GeneratorInstructions: | |
If I say "use this," output the script exactly as given. | |
If I give topics, generate a script based on them. | |
If I provide a full script, rewrite it unchanged. Keep it short, simple, humorous, and serious but funny. Use normal conversational text. | |
Formatting Rules: | |
- Title in square brackets: [Title] | |
- Each section starts with a one-word title in [ ] (max two words). | |
- Narration: 5-10 words, casual, funny, unpredictable. | |
- No special formatting, just script text. | |
- Generalized search terms for Pexels. | |
- End with a funny subscribe statement. | |
Example: | |
[North Korea] | |
Top 5 unknown facts about North Korea. | |
[Invisibility] | |
North Korea’s internet speed doesn’t exist. | |
[Leadership] | |
Kim Jong-un won 100% votes… against himself. | |
[Subscribe] | |
Subscribe, or Kim sends you a ticket to nowhere. | |
Topic: {user_input} | |
""" | |
data = { | |
'model': OPENROUTER_MODEL, | |
'messages': [{'role': 'user', 'content': prompt}], | |
'temperature': 0.4, | |
'max_tokens': 5000 | |
} | |
try: | |
response = requests.post('https://openrouter.ai/api/v1/chat/completions', headers=headers, json=data, timeout=30) | |
response.raise_for_status() | |
return response.json()['choices'][0]['message']['content'] | |
except Exception as e: | |
print(f"Script generation failed: {e}") | |
return None | |
def parse_script(script_text): | |
"""Parse script into a list of elements with media prompts and TTS text.""" | |
elements = [] | |
lines = script_text.splitlines() | |
for i in range(0, len(lines), 2): | |
if i + 1 < len(lines) and lines[i].startswith('[') and lines[i].endswith(']'): | |
title = lines[i][1:-1].strip() | |
text = lines[i + 1].strip() | |
if title and text: | |
elements.append({'type': 'media', 'prompt': title}) | |
elements.append({'type': 'tts', 'text': text, 'voice': 'en'}) | |
return elements | |
def search_pexels_videos(query, api_key): | |
"""Search Pexels for a random HD video.""" | |
headers = {'Authorization': api_key} | |
params = {"query": query, "per_page": 15} | |
try: | |
response = requests.get("https://api.pexels.com/videos/search", headers=headers, params=params, timeout=10) | |
response.raise_for_status() | |
videos = response.json().get("videos", []) | |
hd_videos = [v["video_files"][0]["link"] for v in videos if v["video_files"] and v["video_files"][0]["quality"] == "hd"] | |
return random.choice(hd_videos) if hd_videos else None | |
except Exception as e: | |
print(f"Pexels video search failed: {e}") | |
return None | |
def search_pexels_images(query, api_key): | |
"""Search Pexels for a random image.""" | |
headers = {'Authorization': api_key} | |
params = {"query": query, "per_page": 5, "orientation": "landscape"} | |
try: | |
response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=10) | |
response.raise_for_status() | |
photos = response.json().get("photos", []) | |
return random.choice(photos)["src"]["original"] if photos else None | |
except Exception as e: | |
print(f"Pexels image search failed: {e}") | |
return None | |
def download_file(url, filename): | |
"""Download a file from a URL.""" | |
try: | |
response = requests.get(url, stream=True, timeout=15) | |
response.raise_for_status() | |
with open(filename, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=8192): | |
f.write(chunk) | |
return filename | |
except Exception as e: | |
print(f"Download failed: {e}") | |
return None | |
def generate_media(prompt, video_percentage, temp_folder): | |
"""Generate media based on prompt and video percentage.""" | |
safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_') | |
if random.random() < video_percentage / 100: | |
video_file = os.path.join(temp_folder, f"{safe_prompt}_video.mp4") | |
video_url = search_pexels_videos(prompt, PEXELS_API_KEY) | |
if video_url and download_file(video_url, video_file): | |
return {"path": video_file, "asset_type": "video"} | |
image_file = os.path.join(temp_folder, f"{safe_prompt}.jpg") | |
image_url = search_pexels_images(prompt, PEXELS_API_KEY) | |
if image_url and download_file(image_url, image_file): | |
return {"path": image_file, "asset_type": "image"} | |
return None | |
def generate_tts(text, voice, temp_folder): | |
"""Generate TTS audio with fallback.""" | |
safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_') | |
file_path = os.path.join(temp_folder, f"tts_{safe_text}.wav") | |
try: | |
generator = pipeline(text, voice='af_heart', speed=0.9) | |
audio = next(generator)[2] | |
sf.write(file_path, audio, 24000) | |
return file_path | |
except Exception: | |
try: | |
tts = gTTS(text=text, lang='en') | |
mp3_path = os.path.join(temp_folder, f"tts_{safe_text}.mp3") | |
tts.save(mp3_path) | |
audio = AudioSegment.from_mp3(mp3_path) | |
audio.export(file_path, format="wav") | |
os.remove(mp3_path) | |
return file_path | |
except Exception as e: | |
print(f"TTS generation failed: {e}") | |
return None | |
def resize_to_fill(clip, target_resolution): | |
"""Resize and crop clip to fill target resolution.""" | |
target_w, target_h = target_resolution | |
clip_aspect = clip.w / clip.h | |
target_aspect = target_w / target_h | |
if clip_aspect > target_aspect: | |
clip = clip.resize(height=target_h) | |
crop_amount = (clip.w - target_w) / 2 | |
clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount) | |
else: | |
clip = clip.resize(width=target_w) | |
crop_amount = (clip.h - target_h) / 2 | |
clip = clip.crop(y1=crop_amount, y2=clip.h - crop_amount) | |
return clip | |
def create_clip(media_path, asset_type, tts_path, duration, narration_text, text_color, text_size, caption_bg, target_resolution): | |
"""Create a video clip with media, TTS, and subtitles.""" | |
try: | |
audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2) | |
target_duration = audio_clip.duration + 0.2 | |
if asset_type == "video": | |
clip = VideoFileClip(media_path) | |
clip = resize_to_fill(clip, target_resolution) | |
clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration) | |
else: # image | |
clip = ImageClip(media_path).set_duration(target_duration).resize(target_resolution).fadein(0.3).fadeout(0.3) | |
if narration_text and caption_bg != "transparent": | |
words = narration_text.split() | |
chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)] | |
chunk_duration = audio_clip.duration / len(chunks) | |
subtitle_clips = [ | |
TextClip( | |
chunk, | |
fontsize=text_size, | |
color=text_color, | |
bg_color=caption_bg, | |
size=(target_resolution[0] * 0.8, None), | |
method='caption', | |
align='center' | |
).set_position(('center', target_resolution[1] * 0.7)).set_start(i * chunk_duration).set_end((i + 1) * chunk_duration) | |
for i, chunk in enumerate(chunks) | |
] | |
clip = CompositeVideoClip([clip] + subtitle_clips) | |
clip = clip.set_audio(audio_clip) | |
return clip | |
except Exception as e: | |
print(f"Clip creation failed: {e}") | |
return None | |
def add_background_music(final_video, custom_music_path, music_volume): | |
"""Add background music to the video.""" | |
try: | |
if custom_music_path and os.path.exists(custom_music_path): | |
bg_music = AudioFileClip(custom_music_path) | |
else: | |
bg_music = AudioFileClip("default_music.mp3") # Assume a default music file exists | |
if bg_music.duration < final_video.duration: | |
bg_music = concatenate_audioclips([bg_music] * math.ceil(final_video.duration / bg_music.duration)) | |
bg_music = bg_music.subclip(0, final_video.duration).volumex(music_volume) | |
final_video = final_video.set_audio(CompositeAudioClip([final_video.audio, bg_music])) | |
return final_video | |
except Exception as e: | |
print(f"Background music failed: {e}") | |
return final_video | |
# Gradio Interface | |
with gr.Blocks(title="AI Documentary Video Generator") as app: | |
### Initial Inputs | |
with gr.Column(): | |
concept = gr.Textbox(label="Video Concept", placeholder="Enter your video concept...") | |
resolution = gr.Radio(["Full", "Short"], label="Resolution", value="Full") | |
captions = gr.Radio(["Yes", "No"], label="Captions", value="Yes") | |
video_percentage = gr.Slider(0, 100, label="Video Percentage", value=50) | |
text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF") | |
text_size = gr.Slider(20, 60, label="Text Size", value=28) | |
caption_bg = gr.ColorPicker(label="Caption Background Color", value="transparent") | |
music_volume = gr.Slider(0, 1, label="Music Volume", value=0.08) | |
custom_music = gr.File(label="Upload Custom Background Music", type="file") | |
generate_script_btn = gr.Button("Generate Script") | |
### States | |
num_clips = gr.State(value=0) | |
titles_state = gr.State(value=[]) | |
initial_texts_state = gr.State(value=[]) | |
### Clip Editing Section | |
with gr.Column(visible=False) as clip_section: | |
clip_textboxes = [] | |
clip_files = [] | |
for i in range(10): # Max 10 clips | |
with gr.Row(): | |
text_box = gr.Textbox(label=f"Clip {i+1} Text", visible=False) | |
file_upload = gr.File(label=f"Upload Media for Clip {i+1}", type="file", visible=False) | |
clip_textboxes.append(text_box) | |
clip_files.append(file_upload) | |
generate_video_btn = gr.Button("Generate Video", visible=False) | |
### Output | |
video_output = gr.Video(label="Generated Video") | |
### Script Generation Logic | |
def generate_script_fn(concept): | |
script = generate_script(concept) | |
if not script: | |
return 0, [], [] | |
elements = parse_script(script) | |
titles = [e['prompt'] for e in elements if e['type'] == 'media'] | |
texts = [e['text'] for e in elements if e['type'] == 'tts'] | |
return len(titles), titles, texts | |
def update_textboxes(texts): | |
return [gr.update(value=texts[i] if i < len(texts) else "", visible=i < len(texts)) for i in range(10)] | |
def update_files(n): | |
return [gr.update(visible=i < n) for i in range(10)] | |
generate_script_btn.click( | |
fn=generate_script_fn, | |
inputs=[concept], | |
outputs=[num_clips, titles_state, initial_texts_state] | |
).then( | |
fn=update_textboxes, | |
inputs=[initial_texts_state], | |
outputs=clip_textboxes | |
).then( | |
fn=update_files, | |
inputs=[num_clips], | |
outputs=clip_files | |
).then( | |
fn=lambda: gr.update(visible=True), | |
outputs=[clip_section] | |
).then( | |
fn=lambda: gr.update(visible=True), | |
outputs=[generate_video_btn] | |
) | |
### Video Generation Logic | |
def generate_video_fn(resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles, *clip_data): | |
texts = clip_data[:10] | |
files = clip_data[10:] | |
temp_folder = tempfile.mkdtemp() | |
target_resolution = (1920, 1080) if resolution == "Full" else (1080, 1920) | |
clips = [] | |
for i in range(num_clips): | |
text = texts[i] | |
media_file = files[i] | |
title = titles[i] | |
if media_file: | |
ext = os.path.splitext(media_file)[1].lower() | |
media_path = os.path.join(temp_folder, f"clip_{i}{ext}") | |
shutil.copy(media_file, media_path) | |
asset_type = "video" if ext in ['.mp4', '.avi', '.mov'] else "image" | |
else: | |
media_asset = generate_media(title, video_percentage, temp_folder) | |
if not media_asset: | |
continue | |
media_path = media_asset['path'] | |
asset_type = media_asset['asset_type'] | |
tts_path = generate_tts(text, 'en', temp_folder) | |
if not tts_path: | |
continue | |
duration = max(3, len(text.split()) * 0.5) | |
clip = create_clip( | |
media_path, asset_type, tts_path, duration, text, | |
text_color, text_size, caption_bg if captions == "Yes" else "transparent", target_resolution | |
) | |
if clip: | |
clips.append(clip) | |
if not clips: | |
shutil.rmtree(temp_folder) | |
return None | |
final_video = concatenate_videoclips(clips, method="compose") | |
final_video = add_background_music(final_video, custom_music, music_volume) | |
final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24) | |
shutil.rmtree(temp_folder) | |
return OUTPUT_VIDEO_FILENAME | |
generate_video_btn.click( | |
fn=generate_video_fn, | |
inputs=[resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles_state] + clip_textboxes + clip_files, | |
outputs=[video_output] | |
) | |
app.launch(share=True) |