Spaces:

testdeep123
/

test1

Running

File size: 11,305 Bytes

import gradio as gr
import os
import tempfile
import shutil
import random
import requests
from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip, ImageClip, TextClip
import numpy as np
from gtts import gTTS
import cv2

# Global configurations
PEXELS_API_KEY = 'BhJqbcdm9Vi90KqzXKAhnEHGsuFNv4irXuOjWtT761U49lRzo03qBGna'  # Replace with your Pexels API key
OUTPUT_VIDEO_FILENAME = "final_video.mp4"
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
MAX_CLIPS = 10
TARGET_RESOLUTION = (1920, 1080)  # Default resolution
TEMP_FOLDER = None
CAPTION_COLOR = "#FFFFFF"

# Helper Functions
def generate_script(topic):
    """Generate a simple script based on the topic."""
    return f"[Title]\n{topic}\n[Narration]\nThis is a sample narration about {topic}."

def parse_script(script):
    """Parse the script into elements."""
    elements = []
    lines = script.strip().split('\n')
    for line in lines:
        if line.startswith('[Title]'):
            elements.append({'type': 'media', 'prompt': line[7:].strip()})
        elif line.startswith('[Narration]'):
            elements.append({'type': 'tts', 'text': line[11:].strip()})
    return elements

def search_pexels_videos(query, api_key):
    """Search for videos on Pexels."""
    url = f"https://api.pexels.com/videos/search?query={quote(query)}&per_page=1"
    headers = {"Authorization": api_key}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        if data['videos']:
            return data['videos'][0]['video_files'][0]['link']
    return None

def download_video(url, output_path):
    """Download a video from a URL."""
    response = requests.get(url, headers={"User-Agent": USER_AGENT}, stream=True)
    if response.status_code == 200:
        with open(output_path, 'wb') as f:
            shutil.copyfileobj(response.raw, f)
        return output_path
    return None

def generate_tts(text, lang='en'):
    """Generate TTS audio using gTTS."""
    tts = gTTS(text=text, lang=lang, slow=False)
    tts_path = os.path.join(TEMP_FOLDER, f"tts_{random.randint(0, 10000)}.mp3")
    tts.save(tts_path)
    return tts_path

def resize_media(media_path, target_resolution):
    """Resize media to match target resolution."""
    clip = VideoFileClip(media_path) if media_path.endswith(('.mp4', '.avi', '.mov')) else ImageClip(media_path)
    return clip.resize(target_resolution)

def apply_kenburns_effect(clip, target_resolution):
    """Apply a zoom/pan (Ken Burns) effect to an image clip."""
    w, h = target_resolution
    clip = clip.resize(height=h * 1.2).crop(x_center=w/2, y_center=h/2, width=w, height=h)
    return clip.fx(vfx.zoom_in, 0.1)

def create_clip(media_path, asset_type, tts_path, duration, effects, narration_text, segment_index):
    """Create a video clip with media and narration."""
    if asset_type == 'video':
        clip = VideoFileClip(media_path).subclip(0, min(duration, VideoFileClip(media_path).duration))
    else:
        clip = ImageClip(media_path, duration=duration)
    
    clip = resize_media(media_path, TARGET_RESOLUTION)
    if effects == 'fade-in':
        clip = clip.crossfadein(1.0)
    
    audio = AudioFileClip(tts_path)
    clip = clip.set_audio(audio.set_duration(duration))
    
    if CAPTION_COLOR != "transparent":
        txt_clip = TextClip(narration_text, fontsize=45, color=CAPTION_COLOR, font="Arial", stroke_color="#000000", stroke_width=2)
        txt_clip = txt_clip.set_position('bottom').set_duration(duration)
        clip = CompositeVideoClip([clip, txt_clip])
    
    return clip

def generate_media(prompt, current_index, total_segments):
    """Generate media (placeholder for actual generation)."""
    media_path = os.path.join(TEMP_FOLDER, f"media_{current_index}.jpg")
    with open(media_path, 'wb') as f:
        f.write(requests.get("https://via.placeholder.com/1920x1080").content)  # Placeholder image
    return {'path': media_path, 'asset_type': 'image'}

def process_script(topic, script_input):
    """Process the topic or script and return updates for the UI."""
    if script_input.strip():
        raw_script = script_input
    else:
        raw_script = generate_script(topic)
        if not raw_script:
            return "Failed to generate script", 0, [], [], [], []

    elements = parse_script(raw_script)
    paired_elements = [(elements[i], elements[i + 1]) for i in range(0, len(elements) - 1, 2)]
    num_clips = min(len(paired_elements), MAX_CLIPS)

    accordion_updates = []
    prompt_updates = []
    narration_updates = []
    media_updates = []
    for i in range(MAX_CLIPS):
        if i < num_clips:
            media_elem, tts_elem = paired_elements[i]
            accordion_updates.append(gr.update(visible=True, label=f"Clip {i+1}: {media_elem['prompt'][:20]}..."))
            prompt_updates.append(gr.update(value=media_elem['prompt']))
            narration_updates.append(gr.update(value=tts_elem['text']))
            media_updates.append(gr.update(value=None))
        else:
            accordion_updates.append(gr.update(visible=False))
            prompt_updates.append(gr.update(value=""))
            narration_updates.append(gr.update(value=""))
            media_updates.append(gr.update(value=None))

    return raw_script, num_clips, accordion_updates, prompt_updates, narration_updates, media_updates

def generate_video_full(resolution, render_speed, video_clip_percent, zoom_pan_effect, 
                        bgm_upload, bgm_volume, subtitles_enabled, num_clips, *clip_inputs):
    """Generate the video using all settings and edited clip data."""
    global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
    
    TARGET_RESOLUTION = (1080, 1920) if resolution == "Short (1080x1920)" else (1920, 1080)
    CAPTION_COLOR = "#FFFFFF" if subtitles_enabled else "transparent"
    TEMP_FOLDER = tempfile.mkdtemp()
    
    clips_data = []
    for i in range(num_clips):
        idx = i * 3
        clips_data.append({
            'visual_prompt': clip_inputs[idx],
            'narration': clip_inputs[idx + 1],
            'custom_media': clip_inputs[idx + 2]
        })

    clips = []
    for idx, clip_data in enumerate(clips_data):
        if clip_data['custom_media']:
            media_path = clip_data['custom_media']
            asset_type = 'video' if media_path.endswith(('.mp4', '.avi', '.mov')) else 'image'
        else:
            media_asset = generate_media(clip_data['visual_prompt'], idx, num_clips)
            media_path = media_asset['path']
            asset_type = media_asset['asset_type']
            if random.random() < (video_clip_percent / 100):
                video_url = search_pexels_videos(clip_data['visual_prompt'], PEXELS_API_KEY)
                if video_url:
                    media_path = download_video(video_url, os.path.join(TEMP_FOLDER, f"video_{idx}.mp4"))
                    asset_type = 'video'

        tts_path = generate_tts(clip_data['narration'])
        duration = max(3, len(clip_data['narration'].split()) * 0.5)
        clip = create_clip(media_path, asset_type, tts_path, duration, 'fade-in', clip_data['narration'], idx)
        if clip and zoom_pan_effect and asset_type == 'image':
            clip = apply_kenburns_effect(clip, TARGET_RESOLUTION)
        if clip:
            clips.append(clip)

    if not clips:
        shutil.rmtree(TEMP_FOLDER)
        return None, None

    final_video = concatenate_videoclips(clips, method="compose")
    
    if bgm_upload:
        bg_music = AudioFileClip(bgm_upload).volumex(bgm_volume)
        if bg_music.duration < final_video.duration:
            bg_music = bg_music.loop(duration=final_video.duration)
        else:
            bg_music = bg_music.subclip(0, final_video.duration)
        final_video = final_video.set_audio(CompositeVideoClip([final_video.audio, bg_music]))

    output_path = OUTPUT_VIDEO_FILENAME
    final_video.write_videofile(output_path, codec='libx264', fps=24, preset=render_speed)
    shutil.rmtree(TEMP_FOLDER)

    return output_path, output_path

# Gradio Interface
with gr.Blocks(title="Video Generator") as demo:
    gr.Markdown("# Video Generator")
    gr.Markdown("Create custom videos with script, clips, and settings!")
    
    with gr.Row():
        # Column 1: Content Input & Script Generation
        with gr.Column(scale=1):
            gr.Markdown("### 1. Content Input")
            topic_input = gr.Textbox(label="Topic", placeholder="e.g., Funny Cat Facts")
            script_input = gr.Textbox(label="Or Paste Full Script", lines=10, placeholder="[Title]\nNarration...")
            generate_button = gr.Button("Generate Script & Load Clips")
            script_display = gr.Textbox(label="Generated Script", interactive=False, visible=False)
        
        # Column 2: Clip Editor
        with gr.Column(scale=2):
            gr.Markdown("### 2. Edit Clips")
            with gr.Column():
                clip_accordions = []
                for i in range(MAX_CLIPS):
                    with gr.Accordion(f"Clip {i+1}", visible=False) as acc:
                        visual_prompt = gr.Textbox(label="Visual Prompt")
                        narration = gr.Textbox(label="Narration", lines=3)
                        custom_media = gr.File(label="Upload Custom Media")
                        clip_accordions.append((acc, visual_prompt, narration, custom_media))
        
        # Column 3: Settings & Output
        with gr.Column(scale=1):
            gr.Markdown("### 3. Video Settings")
            resolution = gr.Radio(["Short (1080x1920)", "Full HD (1920x1080)"], label="Resolution", value="Full HD (1920x1080)")
            render_speed = gr.Dropdown(["ultrafast", "fast", "medium", "slow"], label="Render Speed", value="fast")
            video_clip_percent = gr.Slider(0, 100, value=25, label="Video Clip Percentage")
            zoom_pan_effect = gr.Checkbox(label="Add Zoom/Pan Effect", value=True)
            bgm_upload = gr.Audio(label="Upload Background Music", type="filepath")
            bgm_volume = gr.Slider(0.0, 1.0, value=0.15, label="BGM Volume")
            subtitles_enabled = gr.Checkbox(label="Enable Subtitles", value=True)
            generate_video_button = gr.Button("Generate Video")
            gr.Markdown("### 4. Output")
            output_video = gr.Video(label="Generated Video")
            download_button = gr.File(label="Download Video")

    num_clips_state = gr.State(value=0)

    generate_button.click(
        fn=process_script,
        inputs=[topic_input, script_input],
        outputs=[script_display, num_clips_state] + 
                [comp for acc in clip_accordions for comp in [acc[0], acc[1], acc[2], acc[3]]]
    ).then(
        fn=lambda x: gr.update(visible=True),
        inputs=[script_display],
        outputs=[script_display]
    )

    generate_video_button.click(
        fn=generate_video_full,
        inputs=[resolution, render_speed, video_clip_percent, zoom_pan_effect, 
                bgm_upload, bgm_volume, subtitles_enabled, num_clips_state] + 
               [comp for acc in clip_accordions for comp in acc[1:]],
        outputs=[output_video, download_button]
    )

demo.launch()