Spaces:

testdeep123
/

sda

Running

App Files Files Community

testdeep123 commited on 9 days ago

Commit

fb20f92

verified ·

1 Parent(s): 3ea8b5d

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -372

app.py CHANGED Viewed

@@ -1,117 +1,44 @@
-# Import necessary libraries
-from kokoro import KPipeline
-import soundfile as sf
 import os
-from moviepy.editor import (
-    VideoFileClip, concatenate_videoclips, AudioFileClip, CompositeVideoClip, TextClip, CompositeAudioClip
-)
 import tempfile
 import random
 import shutil
-import moviepy.config as mpy_config
-from pydub import AudioSegment
 from gtts import gTTS
 import gradio as gr
-import requests
-import re
-# Initialize Kokoro TTS pipeline (using American English)
-pipeline = KPipeline(lang_code='a')  # Use voice 'af_heart' for American English
-# Ensure ImageMagick binary is set
-mpy_config.change_settings({"IMAGEMAGICK_BINARY": "/usr/bin/convert"})
 # Global Configuration
-OPENROUTER_API_KEY = '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'
-OPENROUTER_MODEL = "google/gemini-2.0-flash-exp:free"
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
-USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
-TARGET_RESOLUTION = (1080, 1920)  # Fixed vertical resolution
-CAPTION_COLOR = None
 TEMP_FOLDER = None
-selected_voice = 'af_heart'  # Default voice
-voice_speed = 0.9  # Default voice speed
-font_size = 45  # Default font size
-bg_music_volume = 0.08  # Default background music volume
-fps = 30  # Default FPS
-preset = "veryfast"  # Default preset
-# Helper Functions
-def generate_script(user_input):
-    """Generate a documentary script based on user input."""
-    headers = {
-        'Authorization': f'Bearer {OPENROUTER_API_KEY}',
-        'HTTP-Referer': 'https://your-domain.com',
-        'X-Title': 'AI Documentary Maker'
-    }
-    prompt = f"""You're a professional documentary narrator. Your job is to write a serious, natural, and informative video script based on one topic.
-The script should sound like a real human voiceover from a TV show or documentary — clear, factual, and engaging, like something you'd hear on National Geographic or a news report.
-Structure:
-- Break the script into scenes using [Tags]. Each tag is a short title (1–2 words) that describes the visual or idea.
-- Under each tag, write one sentence (max 12 words) that fits the tag and continues the topic.
-- The full script should make sense as one connected narration — no randomness.
-- Use natural, formal English. No slang, no fake AI language, and no robotic tone.
-- Do not use humor, sarcasm, or casual language. This is a serious narration.
-- No emotion-sound words like “aww,” “eww,” “whoa,” etc.
-- Do not use numbers like 1, 2, 3 — write them out as one, two, three.
-- At the end, add a [Subscribe] tag with a formal or respectful reason to follow or subscribe.
-Only output the script. No extra comments or text.
-Example:
-[Ocean]
-The ocean covers over seventy percent of the Earth's surface.
-[Currents]
-Ocean currents distribute heat and regulate global climate patterns.
-[Coral Reefs]
-These ecosystems support over one million species of marine life.
-[Pollution]
-Plastic waste threatens marine biodiversity and food chains.
-[Climate Impact]
-Rising temperatures are causing coral bleaching and habitat loss.
-[Subscribe]
-Follow to explore more about the changing planet we live on.
-Topic: {user_input}
-"""
-    data = {
-        'model': OPENROUTER_MODEL,
-        'messages': [{'role': 'user', 'content': prompt}],
-        'temperature': 0.4,
-        'max_tokens': 5000
-    }
-    try:
-        response = requests.post(
-            'https://openrouter.ai/api/v1/chat/completions',
-            headers=headers,
-            json=data,
-            timeout=30
-        )
-        if response.status_code == 200:
-            return response.json()['choices'][0]['message']['content']
-        else:
-            print(f"API Error {response.status_code}: {response.text}")
-            return None
-    except Exception as e:
-        print(f"Request failed: {str(e)}")
-        return None
 def parse_script(script_text):
-    """Parse the script into narration elements."""
     sections = {}
     current_title = None
     current_text = ""
@@ -129,301 +56,155 @@ def parse_script(script_text):
             current_text += line + " "
     if current_title:
         sections[current_title] = current_text.strip()
-    elements = []
-    for title, narration in sections.items():
-        if not narration:
-            continue
-        words = narration.split()
-        duration = max(3, len(words) * 0.5)  # Initial estimate, actual duration from TTS
-        tts_element = {"type": "tts", "text": narration, "voice": "en", "duration": duration}
-        elements.append(tts_element)
-    return elements
-def generate_tts(text, voice):
-    """Generate TTS audio using Kokoro or gTTS as fallback."""
-    safe_text = re.sub(r'[^\w\s-]', '', text[:10]).strip().replace(' ', '_')
     file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
-    if os.path.exists(file_path):
-        print(f"Using cached TTS for text '{text[:10]}...'")
-        return file_path
     try:
-        kokoro_voice = selected_voice if voice == 'en' else voice
-        generator = pipeline(text, voice=kokoro_voice, speed=voice_speed, split_pattern=r'\n+')
-        audio_segments = [audio for _, _, audio in generator]
-        full_audio = np.concatenate(audio_segments) if len(audio_segments) > 1 else audio_segments[0]
-        sf.write(file_path, full_audio, 24000)
-        print(f"TTS audio saved to {file_path} (Kokoro)")
         return file_path
     except Exception as e:
-        print(f"Error with Kokoro TTS: {e}")
-        try:
-            print("Falling back to gTTS...")
-            tts = gTTS(text=text, lang='en')
-            mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
-            tts.save(mp3_path)
-            audio = AudioSegment.from_mp3(mp3_path)
-            audio.export(file_path, format="wav")
-            os.remove(mp3_path)
-            print(f"Fallback TTS saved to {file_path} (gTTS)")
-            return file_path
-        except Exception as fallback_error:
-            print(f"Both TTS methods failed: {fallback_error}")
-            return None
-def resize_to_fill(clip, target_resolution):
-    """Resize and crop clip to fill the target resolution."""
-    target_w, target_h = target_resolution
-    clip_aspect = clip.w / clip.h
-    target_aspect = target_w / target_h
-    if clip_aspect > target_aspect:
-        clip = clip.resize(height=target_h)
-        crop_amount = (clip.w - target_w) / 2
-        clip = clip.crop(x1=crop_amount, x2=clip.w - crop_amount, y1=0, y2=clip.h)
-    else:
-        clip = clip.resize(width=target_w)
-        crop_amount = (clip.h - target_h) / 2
-        clip = clip.crop(x1=0, x2=clip.w, y1=crop_amount, y2=clip.h - crop_amount)
-    return clip
-def add_background_music(final_video, bg_music_volume=0.08):
-    """Add background music to the final video."""
-    try:
-        bg_music_path = "music.mp3"
-        if os.path.exists(bg_music_path):
-            print(f"Adding background music from: {bg_music_path}")
-            bg_music = AudioFileClip(bg_music_path)
-            if bg_music.duration < final_video.duration:
-                loops_needed = math.ceil(final_video.duration / bg_music.duration)
-                bg_segments = [bg_music] * loops_needed
-                bg_music = concatenate_audioclips(bg_segments)
-            bg_music = bg_music.subclip(0, final_video.duration)
-            bg_music = bg_music.volumex(bg_music_volume)
-            video_audio = final_video.audio
-            mixed_audio = CompositeAudioClip([video_audio, bg_music])
-            final_video = final_video.set_audio(mixed_audio)
-            print("Background music added successfully")
-        else:
-            print("No music.mp3 found, skipping background music")
-        return final_video
-    except Exception as e:
-        print(f"Error adding background music: {e}")
-        return final_video
-def create_clip(video_path, start_time, duration, tts_path, narration_text, segment_index):
-    """Create a video clip with synchronized captions."""
-    try:
-        print(f"Creating clip #{segment_index} from {start_time:.2f} to {start_time + duration:.2f}")
-        video_clip = VideoFileClip(video_path).subclip(start_time, start_time + duration)
-        video_clip = resize_to_fill(video_clip, TARGET_RESOLUTION)
-        audio_clip = AudioFileClip(tts_path).audio_fadeout(0.2)
-        video_clip = video_clip.set_audio(audio_clip)
-        if CAPTION_COLOR != "transparent" and narration_text:
-            words = narration_text.split()
-            chunks = [words[i:i+5] for i in range(0, len(words), 5)]
-            chunk_duration = duration / len(chunks) if len(chunks) > 0 else duration
-            subtitle_clips = []
-            for i, chunk in enumerate(chunks):
-                chunk_text = ' '.join(chunk)
-                start_time = i * chunk_duration
-                end_time = (i + 1) * chunk_duration if i < len(chunks) - 1 else duration
-                txt_clip = TextClip(
-                    chunk_text,
-                    fontsize=font_size,
-                    font='Arial-Bold',
-                    color=CAPTION_COLOR,
-                    bg_color='rgba(0, 0, 0, 0.25)',
-                    method='caption',
-                    align='center',
-                    size=(TARGET_RESOLUTION[0] * 0.8, None)
-                ).set_start(start_time).set_end(end_time).set_position(('center', int(TARGET_RESOLUTION[1] * 0.85)))
-                subtitle_clips.append(txt_clip)
-            video_clip = CompositeVideoClip([video_clip] + subtitle_clips)
-        print(f"Clip created: {video_clip.duration:.1f}s")
-        return video_clip
-    except Exception as e:
-        print(f"Error in create_clip: {str(e)}")
         return None
-def fix_imagemagick_policy():
-    """Fix ImageMagick security policies for text rendering."""
     try:
-        print("Attempting to fix ImageMagick security policies...")
-        policy_paths = [
-            "/etc/ImageMagick-6/policy.xml",
-            "/etc/ImageMagick-7/policy.xml",
-            "/etc/ImageMagick/policy.xml",
-            "/usr/local/etc/ImageMagick-7/policy.xml"
-        ]
-        found_policy = next((path for path in policy_paths if os.path.exists(path)), None)
-        if not found_policy:
-            print("No policy.xml found. Text rendering may fail.")
-            return False
-        print(f"Modifying policy file at {found_policy}")
-        os.system(f"sudo cp {found_policy} {found_policy}.bak")
-        os.system(f"sudo sed -i 's/rights=\"none\"/rights=\"read|write\"/g' {found_policy}")
-        os.system(f"sudo sed -i 's/<policy domain=\"path\" pattern=\"@\*\"[^>]*>/<policy domain=\"path\" pattern=\"@*\" rights=\"read|write\"/g' {found_policy}")
-        print("ImageMagick policies updated successfully.")
-        return True
-    except Exception as e:
-        print(f"Error fixing policies: {e}")
-        return False
-def generate_video(user_input, resolution, caption_option):
-    """Generate a video using video.mp4 with synchronized voice and captions."""
-    global TARGET_RESOLUTION, CAPTION_COLOR, TEMP_FOLDER
-    TARGET_RESOLUTION = (1080, 1920)  # Fixed as per requirement
-    CAPTION_COLOR = "white" if caption_option == "Yes" else "transparent"
     TEMP_FOLDER = tempfile.mkdtemp()
-    fix_success = fix_imagemagick_policy()
-    if not fix_success:
-        print("Proceeding without ImageMagick policy fix.")
-    print("Generating script from API...")
-    script = generate_script(user_input)
-    if not script:
-        print("Failed to generate script.")
-        shutil.rmtree(TEMP_FOLDER)
-        return None
-    print("Generated Script:\n", script)
-    elements = parse_script(script)
-    if not elements:
-        print("Failed to parse script into elements.")
         shutil.rmtree(TEMP_FOLDER)
         return None
-    print(f"Parsed {len(elements)} script segments.")
     video_path = "video.mp4"
     if not os.path.exists(video_path):
-        print("video.mp4 not found in the current directory.")
         shutil.rmtree(TEMP_FOLDER)
         return None
-    source_video = VideoFileClip(video_path)
-    total_duration = source_video.duration
-    source_video.close()
-    print(f"Source video duration: {total_duration:.2f} seconds")
-    clips = []
-    for idx, tts_elem in enumerate(elements):
-        print(f"\nProcessing segment {idx+1}/{len(elements)}")
-        tts_path = generate_tts(tts_elem['text'], tts_elem['voice'])
-        if not tts_path:
-            print(f"Skipping segment {idx+1} due to TTS failure.")
-            continue
-        audio_clip = AudioFileClip(tts_path)
-        segment_duration = audio_clip.duration
-        audio_clip.close()
-        max_start = total_duration - segment_duration
-        if max_start <= 0:
-            print(f"Segment duration {segment_duration:.2f}s exceeds video duration {total_duration:.2f}s.")
-            continue
-        start_time = random.uniform(0, max_start)
-        clip = create_clip(
-            video_path=video_path,
-            start_time=start_time,
-            duration=segment_duration,
-            tts_path=tts_path,
-            narration_text=tts_elem['text'],
-            segment_index=idx
-        )
-        if clip:
-            clips.append(clip)
-        else:
-            print(f"Clip creation failed for segment {idx+1}.")
-    if not clips:
-        print("No clips were successfully created.")
         shutil.rmtree(TEMP_FOLDER)
         return None
-    print("\nConcatenating clips...")
-    final_video = concatenate_videoclips(clips, method="compose")
-    final_video = add_background_music(final_video, bg_music_volume=bg_music_volume)
-    print(f"Exporting final video to {OUTPUT_VIDEO_FILENAME}...")
-    final_video.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=fps, preset=preset)
-    print(f"Final video saved as {OUTPUT_VIDEO_FILENAME}")
-    print("Cleaning up temporary files...")
     shutil.rmtree(TEMP_FOLDER)
-    print("Temporary files removed.")
     return OUTPUT_VIDEO_FILENAME
 # Gradio Interface
-VOICE_CHOICES = {
-    'Emma (Female)': 'af_heart',
-    'Bella (Female)': 'af_bella',
-    'Nicole (Female)': 'af_nicole',
-    'Aoede (Female)': 'af_aoede',
-    'Kore (Female)': 'af_kore',
-    'Sarah (Female)': 'af_sarah',
-    'Nova (Female)': 'af_nova',
-    'Sky (Female)': 'af_sky',
-    'Alloy (Female)': 'af_alloy',
-    'Jessica (Female)': 'af_jessica',
-    'River (Female)': 'af_river',
-    'Michael (Male)': 'am_michael',
-    'Fenrir (Male)': 'am_fenrir',
-    'Puck (Male)': 'am_puck',
-    'Echo (Male)': 'am_echo',
-    'Eric (Male)': 'am_eric',
-    'Liam (Male)': 'am_liam',
-    'Onyx (Male)': 'am_onyx',
-    'Santa (Male)': 'am_santa',
-    'Adam (Male)': 'am_adam',
-    'Emma 🇬🇧 (Female)': 'bf_emma',
-    'Isabella 🇬🇧 (Female)': 'bf_isabella',
-    'Alice 🇬🇧 (Female)': 'bf_alice',
-    'Lily 🇬🇧 (Female)': 'bf_lily',
-    'George 🇬🇧 (Male)': 'bm_george',
-    'Fable 🇬🇧 (Male)': 'bm_fable',
-    'Lewis 🇬🇧 (Male)': 'bm_lewis',
-    'Daniel 🇬🇧 (Male)': 'bm_daniel'
-}
-def generate_video_with_options(user_input, resolution, caption_option, music_file, voice, vclip_prob, bg_vol, video_fps, video_preset, v_speed, caption_size):
-    """Wrapper function for Gradio interface to set global options."""
-    global selected_voice, voice_speed, font_size, bg_music_volume, fps, preset
-    selected_voice = VOICE_CHOICES[voice]
-    voice_speed = v_speed
-    font_size = caption_size
-    bg_music_volume = bg_vol
-    fps = video_fps
-    preset = video_preset
-    if music_file is not None:
-        target_path = "music.mp3"
-        shutil.copy(music_file.name, target_path)
-        print(f"Uploaded music saved as: {target_path}")
-    return generate_video(user_input, resolution, caption_option)
 iface = gr.Interface(
-    fn=generate_video_with_options,
     inputs=[
-        gr.Textbox(label="Video Concept", placeholder="Enter your video concept here..."),
-        gr.Radio(["Full", "Short"], label="Resolution", value="Short", visible=False),  # Hidden, fixed to Short
-        gr.Radio(["Yes", "No"], label="Include Captions", value="No"),
-        gr.File(label="Upload Background Music (MP3)", file_types=[".mp3"]),
-        gr.Dropdown(choices=list(VOICE_CHOICES.keys()), label="Choose Voice", value="Emma (Female)"),
-        gr.Slider(0, 100, value=25, step=1, label="Video Clip Usage Probability (%)", visible=False),  # Unused
-        gr.Slider(0.0, 1.0, value=0.08, step=0.01, label="Background Music Volume"),
-        gr.Slider(10, 60, value=30, step=1, label="Video FPS"),
-        gr.Dropdown(choices=["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow"],
-                   value="veryfast", label="Export Preset"),
-        gr.Slider(0.5, 1.5, value=0.9, step=0.05, label="Voice Speed"),
-        gr.Slider(20, 100, value=45, step=1, label="Caption Font Size")
     ],
     outputs=gr.Video(label="Generated Video"),
-    title="AI Documentary Video Generator",
-    description="Create short documentary videos using video.mp4 with AI narration and synced captions."
 )
 if __name__ == "__main__":
-    iface.launch(share=True)

 import os
+from moviepy.editor import VideoFileClip, AudioFileClip, ImageClip, CompositeVideoClip
 import tempfile
 import random
 import shutil
 from gtts import gTTS
+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import textwrap
 import gradio as gr
 # Global Configuration
 OUTPUT_VIDEO_FILENAME = "final_video.mp4"
+TARGET_RESOLUTION = (1080, 1920)  # Vertical video resolution
 TEMP_FOLDER = None
+font_size = 45
+fps = 30
+preset = "veryfast"
+bg_music_volume = 0.08
+def generate_dummy_script():
+    """Generate a dummy script that results in approximately 64 seconds of narration."""
+    return """
+    [Intro]
+    The world is full of natural wonders.
+    [Forests]
+    Forests cover vast regions of the planet.
+    [Rivers]
+    Rivers flow through landscapes shaping the earth.
+    [Mountains]
+    Mountains stand tall against the sky above.
+    [Oceans]
+    Oceans hold mysteries beneath their waves.
+    [Wildlife]
+    Wildlife thrives in diverse habitats worldwide.
+    [Conclusion]
+    Nature continues to inspire us all.
+    """
 def parse_script(script_text):
+    """Parse the script to extract full narration text."""
     sections = {}
     current_title = None
     current_text = ""
             current_text += line + " "
     if current_title:
         sections[current_title] = current_text.strip()
+    full_narration = " ".join(sections.values())
+    return full_narration
+def generate_tts(text):
+    """Generate TTS audio for the full narration."""
+    safe_text = "narration"
     file_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.wav")
     try:
+        tts = gTTS(text=text, lang='en')
+        mp3_path = os.path.join(TEMP_FOLDER, f"tts_{safe_text}.mp3")
+        tts.save(mp3_path)
+        from pydub import AudioSegment
+        audio = AudioSegment.from_mp3(mp3_path)
+        audio.export(file_path, format="wav")
+        os.remove(mp3_path)
+        print(f"TTS audio saved to {file_path}")
         return file_path
     except Exception as e:
+        print(f"TTS generation failed: {e}")
         return None
+def get_audio_duration(audio_path):
+    """Get the duration of the audio file."""
+    audio = AudioFileClip(audio_path)
+    duration = audio.duration
+    audio.close()
+    return duration
+def generate_subtitle_image(text, font_path="arial.ttf", font_size=45, text_color=(255, 255, 255, 255), bg_color=(0, 0, 0, 64), size=(1080, 200)):
+    """Generate a subtitle image with wrapped text."""
+    img = Image.new('RGBA', size, (0, 0, 0, 0))  # Transparent background
+    draw = ImageDraw.Draw(img)
+    if bg_color:
+        draw.rectangle([(0, 0), size], fill=bg_color)
     try:
+        font = ImageFont.truetype(font_path, font_size)
+    except IOError:
+        font = ImageFont.load_default()
+    lines = textwrap.wrap(text, width=40)
+    line_height = font.getsize('hg')[1]
+    total_height = line_height * len(lines)
+    y_start = (size[1] - total_height) / 2
+    for i, line in enumerate(lines):
+        text_width, _ = draw.textsize(line, font=font)
+        x = (size[0] - text_width) / 2
+        y = y_start + i * line_height
+        draw.text((x, y), line, font=font, fill=text_color)
+    return np.array(img)
+def add_background_music(video_clip):
+    """Add background music to the video if available."""
+    bg_music_path = "music.mp3"
+    if os.path.exists(bg_music_path):
+        bg_music = AudioFileClip(bg_music_path)
+        if bg_music.duration < video_clip.duration:
+            from moviepy.audio.AudioClip import concatenate_audioclips
+            loops_needed = int(video_clip.duration / bg_music.duration) + 1
+            bg_music = concatenate_audioclips([bg_music] * loops_needed)
+        bg_music = bg_music.subclip(0, video_clip.duration)
+        bg_music = bg_music.volumex(bg_music_volume)
+        video_audio = video_clip.audio
+        from moviepy.audio.AudioClip import CompositeAudioClip
+        mixed_audio = CompositeAudioClip([video_audio, bg_music])
+        video_clip = video_clip.set_audio(mixed_audio)
+    return video_clip
+def generate_video(user_input, include_captions):
+    """Generate a video with voiceover and optional captions."""
+    global TEMP_FOLDER
     TEMP_FOLDER = tempfile.mkdtemp()
+    # Generate and parse script
+    script = generate_dummy_script()  # Using dummy script for 64s duration
+    full_narration = parse_script(script)
+    print("Full Narration:", full_narration)
+    # Generate voiceover
+    tts_path = generate_tts(full_narration)
+    if not tts_path:
         shutil.rmtree(TEMP_FOLDER)
         return None
+    # Assuming voiceover is 64 seconds as per requirement
+    audio_duration = 64  # Hardcoded for this example
+    video_duration = audio_duration + 0.5  # 64.5 seconds clip
+    # Cut video clip from 13-minute video
     video_path = "video.mp4"
     if not os.path.exists(video_path):
+        print("video.mp4 not found.")
         shutil.rmtree(TEMP_FOLDER)
         return None
+    long_video = VideoFileClip(video_path)
+    total_duration = long_video.duration  # 13 minutes = 780 seconds
+    if total_duration < video_duration:
+        print("Video is too short.")
+        long_video.close()
         shutil.rmtree(TEMP_FOLDER)
         return None
+    start_time = random.uniform(0, total_duration - video_duration)
+    video_clip = long_video.subclip(start_time, start_time + video_duration)
+    long_video.close()
+    # Set voiceover audio
+    video_clip = video_clip.set_audio(AudioFileClip(tts_path))
+    # Add captions if requested
+    if include_captions == "Yes":
+        words = full_narration.split()
+        num_words = len(words)
+        word_duration = audio_duration / num_words  # Timing based on audio_duration
+        chunks = [words[i:i+5] for i in range(0, num_words, 5)]
+        subtitle_clips = []
+        for i, chunk in enumerate(chunks):
+            chunk_text = ' '.join(chunk)
+            start_idx = i * 5
+            end_idx = start_idx + len(chunk) - 1
+            start_time_chunk = start_idx * word_duration
+            end_time_chunk = min((end_idx + 1) * word_duration, audio_duration)
+            subtitle_img = generate_subtitle_image(chunk_text, font_size=font_size)
+            txt_clip = ImageClip(subtitle_img).set_start(start_time_chunk).set_duration(end_time_chunk - start_time_chunk)
+            txt_clip = txt_clip.set_position(('center', TARGET_RESOLUTION[1] - 200))
+            subtitle_clips.append(txt_clip)
+        video_clip = CompositeVideoClip([video_clip] + subtitle_clips)
+    # Add background music
+    video_clip = add_background_music(video_clip)
+    # Export video
+    video_clip.write_videofile(OUTPUT_VIDEO_FILENAME, codec='libx264', fps=fps, preset=preset)
+    print(f"Video saved as {OUTPUT_VIDEO_FILENAME}")
+    # Cleanup
     shutil.rmtree(TEMP_FOLDER)
     return OUTPUT_VIDEO_FILENAME
 # Gradio Interface
 iface = gr.Interface(
+    fn=generate_video,
     inputs=[
+        gr.Textbox(label="Video Concept", placeholder="Enter concept (ignored for this example)"),
+        gr.Radio(["Yes", "No"], label="Include Captions", value="No")
     ],
     outputs=gr.Video(label="Generated Video"),
+    title="Video Generator",
+    description="Generates a 64.5s video clip with a 64s voiceover from a 13min video."
 )
 if __name__ == "__main__":
+    iface.launch()