Spaces:
Running
Running
import gradio as gr | |
import os | |
import tempfile | |
import shutil | |
import random | |
import requests | |
from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip, TextClip | |
import numpy as np | |
from gtts import gTTS | |
import cv2 | |
# Global configurations | |
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna' # Replace with your Pexels API key | |
OUTPUT_VIDEO_FILENAME = "final_video.mp4" | |
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
MAX_CLIPS = 10 | |
TARGET_RESOLUTION = (1920, 1080) # Default resolution | |
TEMP_FOLDER = None | |
CAPTION_COLOR = "#FFFFFF" | |
# Helper Functions | |
def generate_script(topic): | |
"""Generate a simple script based on the topic.""" | |
return f"[Title]\n{topic}\n[Narration]\nThis is a sample narration about {topic}." | |
def parse_script(script): | |
"""Parse the script into elements.""" | |
elements = [] | |
lines = script.strip().split('\n') | |
for line in lines: | |
if line.startswith('[Title]'): | |
elements.append({'type': 'media', 'prompt': line[7:].strip()}) | |
elif line.startswith('[Narration]'): | |
elements.append({'type': 'tts', 'text': line[11:].strip()}) | |
return elements | |
def search_pexels_videos(query, api_key): | |
"""Search for videos on Pexels.""" | |
url = f"https://api.pexels.com/videos/search?query={quote(query)}&per_page=1" | |
headers = {"Authorization": api_key} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
data = response.json() | |
if data['videos']: | |
return data['videos'][0]['video_files'][0]['link'] | |
return None | |
def download_video(url, output_path): | |
"""Download a video from a URL.""" | |
response = requests.get(url, headers={"User-Agent": USER_AGENT}, stream=True) | |
if response.status_code == 200: | |
with open(output_path, 'wb') as f: | |
shutil.copyfileobj(response.raw, f) | |
return output_path | |
return None | |
def generate_tts(text, lang='en'): | |
"""Generate TTS audio using gTTS.""" | |
tts = gTTS(text=text, lang=lang, slow=False) | |
tts_path = os.path.join(TEMP_FOLDER, f"tts_{random.randint(0, 10000)}.mp3") | |
tts.save(tts_path) | |
return tts_path | |
def resize_media(media_path, target_resolution): | |
"""Resize media to match target resolution.""" | |
clip = VideoFileClip(media_path) if media_path.endswith(('.mp4', '.avi', '.mov')) else ImageClip(media_path) | |
return clip.resize(target_resolution) | |
def apply_kenburns_effect(clip, target_resolution): | |
"""Apply a zoom/pan (Ken Burns) effect to an image clip.""" | |
w, h = target_resolution | |
clip = clip.resize(height=h * 1.2).crop(x_center=w/2, y_center=h/2, width=w, height=h) | |
return clip.fx(vfx.zoom_in, 0.1) | |
def create_clip(media_path, asset_type, tts_path, duration, effects, narration_text, segment_index): | |
"""Create a video clip with media and narration.""" | |
if asset_type == 'video': | |
clip = VideoFileClip(media_path).subclip(0, min(duration, VideoFileClip(media_path).duration)) | |
else: | |
clip = ImageClip(media_path, duration=duration) | |
clip = resize_media(media_path, TARGET_RESOLUTION) | |
if effects == 'fade-in': | |
clip = clip.crossfadein(1.0) | |
audio = AudioFileClip(tts_path) | |
clip = clip.set_audio(audio.set_duration(duration)) | |
if CAPTION_COLOR != "transparent": | |
txt_clip = TextClip(narration_text, fontsize=45, color=CAPTION_COLOR, font="Arial", stroke_color="#000000", stroke_width=2) | |
txt_clip = txt_clip.set_position('bottom').set_duration(duration) | |
clip = CompositeVideoClip([clip, txt_clip]) | |
return clip | |
def generate_media(prompt, current_index, total_segments): | |
"""Generate media (placeholder for actual generation).""" | |
media_path = os.path.join(TEMP_FOLDER, f"media_{current_index}.jpg") | |
with open(media_path, 'wb') as f: | |
f.write(requests.get("https://via.placeholder.com/1920x1080").content) # Placeholder image | |
return {'path': media_path, 'asset_type': 'image'} | |
def process_script(topic, script_input): | |
"""Process the topic or script and return updates for the UI.""" | |
if script_input.strip(): | |
raw_script = script_input | |
else: | |
raw_script = generate_script(topic) | |
if not raw_script: | |
return "Failed to generate script", 0, [], [], [], [] | |
elements = parse_script(raw_script) | |
paired_elements = [(elements[i], elements[i + 1]) for i in range(0, len(elements) - 1, 2)] | |
num_clips = min(len(paired_elements), MAX_CLIPS) | |
accordion_updates = [] | |
prompt_updates = [] | |
narration_updates = [] | |
media_updates = [] | |
for i in range(MAX_CLIPS): | |
if i < num_clips: | |
media_elem, tts_elem = paired_elements[i] | |
accordion_updates.append(gr.update(visible=True, label=f"Clip {i+1}: {media_elem['prompt'][:20]}...")) | |
prompt_updates.append(gr.update(value=media_elem['prompt'])) | |
narration_updates.append(gr.update(value=tts_elem['text'])) | |
media_updates.append(gr.update(value=None)) | |
else: | |
accordion_updates.append(gr.update(visible=False)) | |
prompt_updates.append(gr.update(value="")) | |
narration_updates.append(gr.update(value="")) | |
media_updates.append(gr.update(value=None)) | |
return raw_script, num_clips, accordion_updates, prompt_updates, narration_updates, media_updates | |
def generate_video_full(resolution, render_speed, video_clip_percent, zoom_pan_effect, | |
bgm_upload, bgm_volume, subtitles_enabled, num_clips, *clip_inputs): | |
"""Generate the video using all settings and edited clip data.""" | |
global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER | |
TARGET_RESOLUTION = (1080, 1920) if resolution == "Short (1080x1920)" else (1920, 1080) | |
CAPTION_COLOR = "#FFFFFF" if subtitles_enabled else "transparent" | |
TEMP_FOLDER = tempfile.mkdtemp() | |
clips_data = [] | |
for i in range(num_clips): | |
idx = i * 3 | |
clips_data.append({ | |
'visual_prompt': clip_inputs[idx], | |
'narration': clip_inputs[idx + 1], | |
'custom_media': clip_inputs[idx + 2] | |
}) | |
clips = [] | |
for idx, clip_data in enumerate(clips_data): | |
if clip_data['custom_media']: | |
media_path = clip_data['custom_media'] | |
asset_type = 'video' if media_path.endswith(('.mp4', '.avi', '.mov')) else 'image' | |
else: | |
media_asset = generate_media(clip_data['visual_prompt'], idx, num_clips) | |
media_path = media_asset['path'] | |
asset_type = media_asset['asset_type'] | |
if random.random() < (video_clip_percent / 100): | |
video_url = search_pexels_videos(clip_data['visual_prompt'], PEXELS_API_KEY) | |
if video_url: | |
media_path = download_video(video_url, os.path.join(TEMP_FOLDER, f"video_{idx}.mp4")) | |
asset_type = 'video' | |
tts_path = generate_tts(clip_data['narration']) | |
duration = max(3, len(clip_data['narration'].split()) * 0.5) | |
clip = create_clip(media_path, asset_type, tts_path, duration, 'fade-in', clip_data['narration'], idx) | |
if clip and zoom_pan_effect and asset_type == 'image': | |
clip = apply_kenburns_effect(clip, TARGET_RESOLUTION) | |
if clip: | |
clips.append(clip) | |
if not clips: | |
shutil.rmtree(TEMP_FOLDER) | |
return None, None | |
final_video = concatenate_videoclips(clips, method="compose") | |
if bgm_upload: | |
bg_music = AudioFileClip(bgm_upload).volumex(bgm_volume) | |
if bg_music.duration < final_video.duration: | |
bg_music = bg_music.loop(duration=final_video.duration) | |
else: | |
bg_music = bg_music.subclip(0, final_video.duration) | |
final_video = final_video.set_audio(CompositeVideoClip([final_video.audio, bg_music])) | |
output_path = OUTPUT_VIDEO_FILENAME | |
final_video.write_videofile(output_path, codec='libx264', fps=24, preset=render_speed) | |
shutil.rmtree(TEMP_FOLDER) | |
return output_path, output_path | |
# Gradio Interface | |
with gr.Blocks(title="Video Generator") as demo: | |
gr.Markdown("# Video Generator") | |
gr.Markdown("Create custom videos with script, clips, and settings!") | |
with gr.Row(): | |
# Column 1: Content Input & Script Generation | |
with gr.Column(scale=1): | |
gr.Markdown("### 1. Content Input") | |
topic_input = gr.Textbox(label="Topic", placeholder="e.g., Funny Cat Facts") | |
script_input = gr.Textbox(label="Or Paste Full Script", lines=10, placeholder="[Title]\nNarration...") | |
generate_button = gr.Button("Generate Script & Load Clips") | |
script_display = gr.Textbox(label="Generated Script", interactive=False, visible=False) | |
# Column 2: Clip Editor | |
with gr.Column(scale=2): | |
gr.Markdown("### 2. Edit Clips") | |
with gr.Column(): | |
clip_accordions = [] | |
for i in range(MAX_CLIPS): | |
with gr.Accordion(f"Clip {i+1}", visible=False) as acc: | |
visual_prompt = gr.Textbox(label="Visual Prompt") | |
narration = gr.Textbox(label="Narration", lines=3) | |
custom_media = gr.File(label="Upload Custom Media") | |
clip_accordions.append((acc, visual_prompt, narration, custom_media)) | |
# Column 3: Settings & Output | |
with gr.Column(scale=1): | |
gr.Markdown("### 3. Video Settings") | |
resolution = gr.Radio(["Short (1080x1920)", "Full HD (1920x1080)"], label="Resolution", value="Full HD (1920x1080)") | |
render_speed = gr.Dropdown(["ultrafast", "fast", "medium", "slow"], label="Render Speed", value="fast") | |
video_clip_percent = gr.Slider(0, 100, value=25, label="Video Clip Percentage") | |
zoom_pan_effect = gr.Checkbox(label="Add Zoom/Pan Effect", value=True) | |
bgm_upload = gr.Audio(label="Upload Background Music", type="filepath") | |
bgm_volume = gr.Slider(0.0, 1.0, value=0.15, label="BGM Volume") | |
subtitles_enabled = gr.Checkbox(label="Enable Subtitles", value=True) | |
generate_video_button = gr.Button("Generate Video") | |
gr.Markdown("### 4. Output") | |
output_video = gr.Video(label="Generated Video") | |
download_button = gr.File(label="Download Video") | |
num_clips_state = gr.State(value=0) | |
generate_button.click( | |
fn=process_script, | |
inputs=[topic_input, script_input], | |
outputs=[script_display, num_clips_state] + | |
[comp for acc in clip_accordions for comp in [acc[0], acc[1], acc[2], acc[3]]] | |
).then( | |
fn=lambda x: gr.update(visible=True), | |
inputs=[script_display], | |
outputs=[script_display] | |
) | |
generate_video_button.click( | |
fn=generate_video_full, | |
inputs=[resolution, render_speed, video_clip_percent, zoom_pan_effect, | |
bgm_upload, bgm_volume, subtitles_enabled, num_clips_state] + | |
[comp for acc in clip_accordions for comp in acc[1:]], | |
outputs=[output_video, download_button] | |
) | |
demo.launch() |