VeoFlux / app.py
testdeep123's picture
Update app.py
48a6754 verified
raw
history blame
14.7 kB
# Import necessary libraries
import gradio as gr
import os
import shutil
import tempfile
import random
import requests
import soundfile as sf
from moviepy.editor import (
VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip, CompositeVideoClip, TextClip
)
import moviepy.video.fx.all as vfx
from kokoro import KPipeline
from gtts import gTTS
from pydub import AudioSegment
import math
import re
from PIL import Image
# Initialize Kokoro TTS pipeline (using American English)
pipeline = KPipeline(lang_code='a')
# Global Configuration
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'
OPENROUTER_API_KEY = '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'
OPENROUTER_MODEL = "mistralai/mistral-small-3.1-24b-instruct:free"
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
# Helper Functions
def generate_script(user_input):
"""Generate a documentary script using OpenRouter API."""
headers = {
'Authorization': f'Bearer {OPENROUTER_API_KEY}',
'HTTP-Referer': 'https://your-domain.com',
'X-Title': 'AI Documentary Maker'
}
prompt = f"""Short Documentary Script GeneratorInstructions:
If I say "use this," output the script exactly as given.
If I give topics, generate a script based on them.
If I provide a full script, rewrite it unchanged. Keep it short, simple, humorous, and serious but funny. Use normal conversational text.
Formatting Rules:
- Title in square brackets: [Title]
- Each section starts with a one-word title in [ ] (max two words).
- Narration: 5-10 words, casual, funny, unpredictable.
- No special formatting, just script text.
- Generalized search terms for Pexels.
- End with a funny subscribe statement.
Example:
[North Korea]
Top 5 unknown facts about North Korea.
[Invisibility]
North Korea’s internet speed doesn’t exist.
[Leadership]
Kim Jong-un won 100% votes… against himself.
[Subscribe]
Subscribe, or Kim sends you a ticket to nowhere.
Topic: {user_input}
"""
data = {
'model': OPENROUTER_MODEL,
'messages': [{'role': 'user', 'content': prompt}],
'temperature': 0.4,
'max_tokens': 5000
}
try:
response = requests.post('https://openrouter.ai/api/v1/chat/completions', headers=headers, json=data, timeout=30)
response.raise_for_status()
return response.json()['choices'][0]['message']['content']
except Exception as e:
print(f"Script generation failed: {e}")
return None
def parse_script(script_text):
"""Parse script into a list of elements with media prompts and TTS text."""
elements = []
lines = script_text.splitlines()
for i in range(0, len(lines), 2):
if i + 1 < len(lines) and lines[i].startswith('[') and lines[i].endswith(']'):
title = lines[i][1:-1].strip()
text = lines[i + 1].strip()
if title and text:
elements.append({'type': 'media', 'prompt': title})
elements.append({'type': 'tts', 'text': text, 'voice': 'en'})
return elements
def search_pexels_videos(query, api_key):
"""Search Pexels for a random HD video."""
headers = {'Authorization': api_key}
params = {"query": query, "per_page": 15}
try:
response = requests.get("https://api.pexels.com/videos/search", headers=headers, params=params, timeout=10)
response.raise_for_status()
videos = response.json().get("videos", [])
hd_videos = [v["video_files"][0]["link"] for v in videos if v["video_files"] and v["video_files"][0]["quality"] == "hd"]
return random.choice(hd_videos) if hd_videos else None
except Exception as e:
print(f"Pexels video search failed: {e}")
return None
def search_pexels_images(query, api_key):
"""Search Pexels for a random image."""
headers = {'Authorization': api_key}
params = {"query": query, "per_page": 5, "orientation": "landscape"}
try:
response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=10)
response.raise_for_status()
photos = response.json().get("photos", [])
return random.choice(photos)["src"]["original"] if photos else None
except Exception as e:
print(f"Pexels image search failed: {e}")
return None
def download_file(url, filename):
"""Download a file from a URL."""
try:
response = requests.get(url, stream=True, timeout=15)
response.raise_for_status()
with open(filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return filename
except Exception as e:
print(f"Download failed: {e}")
return None
def generate_media(prompt, video_percentage, temp_folder):
"""Generate media based on prompt and video percentage."""
safe_prompt = re.sub(r'[^\w\s-]', '', prompt).strip().replace(' ', '_')
if random.random() < video_percentage / 100:
video_file = os.path.join(temp_folder, f"{safe_prompt}_video.mp4")
video_url = search_pexels_videos(prompt, PEXELS_API_KEY)
if video_url and download_file(video_url, video_file):
return {"path": video_file, "asset_type": "video"}
image_file = os.path.join(temp_folder, f"{safe_prompt}.jpg")
image_url = search_pexels_images(prompt, PEXELS_API_KEY)
if image_url and download_file(image_url, image_file):
return {"path": image_file, "asset_type": "image"}
return None
def generate_tts(text, voice, temp_folder):
"""Generate TTS audio with fallback."""
safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
file_path = os.path.join(temp_folder, f"tts_{safe_text}.wav")
try:
generator = pipeline(text, voice='af_heart', speed=0.9)
audio = next(generator)[2]
sf.write(file_path, audio, 24000)
return file_path
except Exception:
try:
tts = gTTS(text=text, lang='en')
mp3_path = os.path.join(temp_folder, f"tts_{safe_text}.mp3")
tts.save(mp3_path)
audio = AudioSegment.from_mp3(mp3_path)
audio.export(file_path, format="wav")
os.remove(mp3_path)
return file_path
except Exception as e:
print(f"TTS generation failed: {e}")
return None
def resize_to_fill(clip, target_resolution):
"""Resize and crop clip to fill target resolution."""
target_w, target_h = target_resolution
clip_aspect = clip.w / clip.h
target_aspect = target_w / target_h
if clip_aspect > target_aspect:
clip = clip.resize(height=target_h)
crop_amount = (clip.w - target_w) / 2
clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount)
else:
clip = clip.resize(width=target_w)
crop_amount = (clip.h - target_h) / 2
clip = clip.crop(y1=crop_amount, y2=clip.h - crop_amount)
return clip
def create_clip(media_path, asset_type, tts_path, duration, narration_text, text_color, text_size, caption_bg, target_resolution):
"""Create a video clip with media, TTS, and subtitles."""
try:
audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
target_duration = audio_clip.duration + 0.2
if asset_type == "video":
clip = VideoFileClip(media_path)
clip = resize_to_fill(clip, target_resolution)
clip = clip.loop(duration=target_duration) if clip.duration < target_duration else clip.subclip(0, target_duration)
else: # image
clip = ImageClip(media_path).set_duration(target_duration).resize(target_resolution).fadein(0.3).fadeout(0.3)
if narration_text and caption_bg != "transparent":
words = narration_text.split()
chunks = [' '.join(words[i:i+5]) for i in range(0, len(words), 5)]
chunk_duration = audio_clip.duration / len(chunks)
subtitle_clips = [
TextClip(
chunk,
fontsize=text_size,
color=text_color,
bg_color=caption_bg,
size=(target_resolution[0] * 0.8, None),
method='caption',
align='center'
).set_position(('center', target_resolution[1] * 0.7)).set_start(i * chunk_duration).set_end((i + 1) * chunk_duration)
for i, chunk in enumerate(chunks)
]
clip = CompositeVideoClip([clip] + subtitle_clips)
clip = clip.set_audio(audio_clip)
return clip
except Exception as e:
print(f"Clip creation failed: {e}")
return None
def add_background_music(final_video, custom_music_path, music_volume):
"""Add background music to the video."""
try:
if custom_music_path and os.path.exists(custom_music_path):
bg_music = AudioFileClip(custom_music_path)
else:
bg_music = AudioFileClip("default_music.mp3") # Assume a default music file exists
if bg_music.duration < final_video.duration:
bg_music = concatenate_audioclips([bg_music] * math.ceil(final_video.duration / bg_music.duration))
bg_music = bg_music.subclip(0, final_video.duration).volumex(music_volume)
final_video = final_video.set_audio(CompositeAudioClip([final_video.audio, bg_music]))
return final_video
except Exception as e:
print(f"Background music failed: {e}")
return final_video
# Gradio Interface
with gr.Blocks(title="AI Documentary Video Generator") as app:
### Initial Inputs
with gr.Column():
concept = gr.Textbox(label="Video Concept", placeholder="Enter your video concept...")
resolution = gr.Radio(["Full", "Short"], label="Resolution", value="Full")
captions = gr.Radio(["Yes", "No"], label="Captions", value="Yes")
video_percentage = gr.Slider(0, 100, label="Video Percentage", value=50)
text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
text_size = gr.Slider(20, 60, label="Text Size", value=28)
caption_bg = gr.ColorPicker(label="Caption Background Color", value="transparent")
music_volume = gr.Slider(0, 1, label="Music Volume", value=0.08)
custom_music = gr.File(label="Upload Custom Background Music", type="file")
generate_script_btn = gr.Button("Generate Script")
### States
num_clips = gr.State(value=0)
titles_state = gr.State(value=[])
initial_texts_state = gr.State(value=[])
### Clip Editing Section
with gr.Column(visible=False) as clip_section:
clip_textboxes = []
clip_files = []
for i in range(10): # Max 10 clips
with gr.Row():
text_box = gr.Textbox(label=f"Clip {i+1} Text", visible=False)
file_upload = gr.File(label=f"Upload Media for Clip {i+1}", type="file", visible=False)
clip_textboxes.append(text_box)
clip_files.append(file_upload)
generate_video_btn = gr.Button("Generate Video", visible=False)
### Output
video_output = gr.Video(label="Generated Video")
### Script Generation Logic
def generate_script_fn(concept):
script = generate_script(concept)
if not script:
return 0, [], []
elements = parse_script(script)
titles = [e['prompt'] for e in elements if e['type'] == 'media']
texts = [e['text'] for e in elements if e['type'] == 'tts']
return len(titles), titles, texts
def update_textboxes(texts):
return [gr.update(value=texts[i] if i < len(texts) else "", visible=i < len(texts)) for i in range(10)]
def update_files(n):
return [gr.update(visible=i < n) for i in range(10)]
generate_script_btn.click(
fn=generate_script_fn,
inputs=[concept],
outputs=[num_clips, titles_state, initial_texts_state]
).then(
fn=update_textboxes,
inputs=[initial_texts_state],
outputs=clip_textboxes
).then(
fn=update_files,
inputs=[num_clips],
outputs=clip_files
).then(
fn=lambda: gr.update(visible=True),
outputs=[clip_section]
).then(
fn=lambda: gr.update(visible=True),
outputs=[generate_video_btn]
)
### Video Generation Logic
def generate_video_fn(resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles, *clip_data):
texts = clip_data[:10]
files = clip_data[10:]
temp_folder = tempfile.mkdtemp()
target_resolution = (1920, 1080) if resolution == "Full" else (1080, 1920)
clips = []
for i in range(num_clips):
text = texts[i]
media_file = files[i]
title = titles[i]
if media_file:
ext = os.path.splitext(media_file)[1].lower()
media_path = os.path.join(temp_folder, f"clip_{i}{ext}")
shutil.copy(media_file, media_path)
asset_type = "video" if ext in ['.mp4', '.avi', '.mov'] else "image"
else:
media_asset = generate_media(title, video_percentage, temp_folder)
if not media_asset:
continue
media_path = media_asset['path']
asset_type = media_asset['asset_type']
tts_path = generate_tts(text, 'en', temp_folder)
if not tts_path:
continue
duration = max(3, len(text.split()) * 0.5)
clip = create_clip(
media_path, asset_type, tts_path, duration, text,
text_color, text_size, caption_bg if captions == "Yes" else "transparent", target_resolution
)
if clip:
clips.append(clip)
if not clips:
shutil.rmtree(temp_folder)
return None
final_video = concatenate_videoclips(clips, method="compose")
final_video = add_background_music(final_video, custom_music, music_volume)
final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=24)
shutil.rmtree(temp_folder)
return OUTPUT_VIDEO_FILENAME
generate_video_btn.click(
fn=generate_video_fn,
inputs=[resolution, captions, video_percentage, text_color, text_size, caption_bg, music_volume, custom_music, num_clips, titles_state] + clip_textboxes + clip_files,
outputs=[video_output]
)
app.launch(share=True)