ShortsGenerator0.3

Running

App Files Files Community

AZILS commited on Apr 7

Commit

ce8dacb

verified ·

1 Parent(s): e591a65

Update app.py

Browse files

Files changed (1) hide show

app.py +398 -353

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ import random
 import tempfile
 import requests
 import numpy as np
-from PIL import Image
 from io import BytesIO
 from datetime import datetime
 import gradio as gr
@@ -16,19 +17,28 @@ from moviepy.editor import *
 from moviepy.audio.fx.all import volumex
 from moviepy.video.fx.all import crop
 # Load environment variables from .env file if present
 load_dotenv()
-# Constants
 CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator")
-ASSETS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
-MUSIC_DIR = os.path.join(ASSETS_DIR, "background_music")
-FONTS_DIR = os.path.join(ASSETS_DIR, "fonts")
 # Create necessary directories
-os.makedirs(CACHE_DIR, exist_ok=True)
 os.makedirs(MUSIC_DIR, exist_ok=True)
 os.makedirs(FONTS_DIR, exist_ok=True)
 # Helper functions for logging
 def info(message):
@@ -75,7 +85,7 @@ def get_font_files():
     if not font_files:
         return ["default"]
-    return ["default"] + font_files
 def choose_random_music():
     """Selects a random music file from the music directory."""
@@ -90,6 +100,19 @@ def choose_random_music():
     return os.path.join(MUSIC_DIR, random.choice(music_files))
 class YouTube:
     def __init__(self, niche: str, language: str,
                  text_gen="g4f", text_model="gpt-4",
@@ -211,20 +234,17 @@ class YouTube:
                 ).choices[0].message.content
             else:
-                # Default to g4f if other methods aren't available
-                self.log(f"Using default G4F model as fallback")
-                import g4f
-                response = g4f.ChatCompletion.create(
-                    model="gpt-3.5-turbo",
-                    messages=[{"role": "user", "content": prompt}]
-                )
             self.log(f"Response generated successfully, length: {len(response)} characters")
             return response
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
-            self.log(error_msg)
             raise Exception(error_msg)
     def generate_topic(self) -> str:
@@ -281,8 +301,8 @@ class YouTube:
             raise Exception("Failed to generate a script. Please try again.")
         if len(completion) > 5000:
-            self.log(warning("Generated Script is too long. Retrying..."))
-            return self.generate_script()
         self.script = completion
         self.log(success(f"Generated script ({len(completion)} chars)"))
@@ -299,8 +319,8 @@ class YouTube:
         )
         if len(title) > 100:
-            self.log(warning("Generated Title is too long. Retrying..."))
-            return self.generate_metadata()
         description = self.generate_response(
             f"Please generate a YouTube Video Description for the following script: {self.script}. "
@@ -370,15 +390,8 @@ class YouTube:
                 r = re.compile(r"\[.*\]", re.DOTALL)
                 matches = r.findall(completion)
                 if len(matches) == 0:
-                    self.log(warning("Failed to extract array. Creating generic image prompts."))
-                    # Create generic prompts based on the subject
-                    image_prompts = [
-                        f"A beautiful image showing {self.subject}, photorealistic",
-                        f"A detailed visualization of {self.subject}, high quality",
-                        f"An artistic representation of {self.subject}, vibrant colors",
-                        f"A photorealistic image about {self.subject}, high resolution",
-                        f"A dramatic scene related to {self.subject}, cinema quality"
-                    ]
                 else:
                     try:
                         image_prompts = json.loads(matches[0])
@@ -390,15 +403,13 @@ class YouTube:
                         if strings:
                             image_prompts = strings
                         else:
-                            # Last resort - split by commas and clean up
-                            image_prompts = [
-                                s.strip().strip('"').strip("'")
-                                for s in matches[0].strip('[]').split(',')
-                            ]
         # Ensure we have the requested number of prompts
-        while len(image_prompts) < count:
-            image_prompts.append(f"A high-quality image about {self.subject}")
         # Limit to the requested count
         image_prompts = image_prompts[:count]
@@ -414,9 +425,10 @@ class YouTube:
         """Generate an image using the selected image generation model."""
         self.log(f"Generating image for prompt: {prompt[:50]}...")
         try:
-            image_path = os.path.join(CACHE_DIR, f"img_{len(self.images)}_{int(time.time())}.png")
             if self.image_gen == "prodia":
                 self.log("Using Prodia provider for image generation")
                 s = requests.Session()
@@ -496,31 +508,28 @@ class YouTube:
             elif self.image_gen == "g4f":
                 self.log("Using G4F provider for image generation")
-                try:
-                    from g4f.client import Client
-                    client = Client()
-                    response = client.images.generate(
-                        model=self.image_model,
-                        prompt=prompt,
-                        response_format="url"
-                    )
-                    if response and response.data and len(response.data) > 0:
-                        image_url = response.data[0].url
-                        image_response = requests.get(image_url)
-                        if image_response.status_code == 200:
-                            with open(image_path, "wb") as f:
-                                f.write(image_response.content)
-                            self.images.append(image_path)
-                            self.log(success(f"Image saved to: {image_path}"))
-                            return image_path
-                        else:
-                            raise Exception(f"Failed to download image from {image_url}")
                     else:
-                        raise Exception("No image URL received from G4F")
-                except Exception as e:
-                    raise Exception(f"G4F image generation failed: {str(e)}")
             elif self.image_gen == "segmind":
                 self.log("Using Segmind provider for image generation")
@@ -569,10 +578,8 @@ class YouTube:
                     raise Exception(f"Pollinations request failed with status code: {response.status_code}")
             else:
-                # Default to generating a colored placeholder image
                 self.log(f"Unknown provider '{self.image_gen}'. Generating placeholder image.")
-                # Create a placeholder colored image with the prompt text
                 img = Image.new('RGB', (800, 800), color=(random.randint(0, 255),
                                                          random.randint(0, 255),
                                                          random.randint(0, 255)))
@@ -585,7 +592,7 @@ class YouTube:
             error_msg = f"Image generation failed: {str(e)}"
             self.log(error(error_msg))
-            # Create a fallback image
             try:
                 img = Image.new('RGB', (800, 800), color=(200, 200, 200))
                 image_path = os.path.join(CACHE_DIR, f"error_img_{len(self.images)}_{int(time.time())}.png")
@@ -607,6 +614,7 @@ class YouTube:
         self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
         audio_path = os.path.join(CACHE_DIR, f"speech_{int(time.time())}.{output_format}")
         try:
@@ -624,16 +632,35 @@ class YouTube:
                 payload = {
                     "text": text,
-                    "model_id": "eleven_monolingual_v1",
                     "voice_settings": {
                         "stability": 0.5,
                         "similarity_boost": 0.5,
                         "style": 0.0,
                         "use_speaker_boost": True
-                    }
                 }
-                voice_id = self.tts_voice if self.tts_voice not in ["Sarah", "default"] else "21m00Tcm4TlvDq8ikWAM"
                 response = requests.post(
                     url=f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
@@ -646,8 +673,15 @@ class YouTube:
                         f.write(response.content)
                     self.log(success(f"Speech generated successfully using ElevenLabs at {audio_path}"))
                 else:
-                    raise Exception(f"ElevenLabs API error: {response.text}")
             elif self.tts_engine == "gtts":
                 self.log("Using Google TTS provider for speech generation")
                 from gtts import gTTS
@@ -685,11 +719,18 @@ class YouTube:
                 asyncio.run(generate())
             else:
-                # Fallback to gtts
-                self.log(f"Unknown TTS engine '{self.tts_engine}'. Falling back to gTTS.")
-                from gtts import gTTS
-                tts = gTTS(text=text, lang=self.language[:2].lower(), slow=False)
-                tts.save(audio_path)
             self.log(success(f"Speech generated and saved to: {audio_path}"))
             self.tts_path = audio_path
@@ -715,24 +756,18 @@ class YouTube:
                 self.log(error("Failed to create silent audio fallback"))
                 return None
-    def generate_subtitles(self, audio_path):
-        """Generate word-level subtitles for the video."""
-        if not self.subtitles_enabled:
-            self.log("Subtitles are disabled. Skipping subtitle generation.")
-            return None
-        self.progress(0.65, desc="Creating subtitles")
-        self.log("Starting subtitle generation process")
         try:
-            assemblyai_api_key = os.environ.get("ASSEMBLYAI_API_KEY", "")
-            if not assemblyai_api_key:
-                self.log(warning("AssemblyAI API key not set. Generating simulated subtitles."))
-                return self._generate_simulated_subtitles()
-            import assemblyai as aai
-            aai.settings.api_key = assemblyai_api_key
             config = aai.TranscriptionConfig(speaker_labels=False, word_boost=[], format_text=True)
             transcriber = aai.Transcriber(config=config)
@@ -741,51 +776,63 @@ class YouTube:
             transcript = transcriber.transcribe(audio_path)
             if not transcript or not transcript.words:
-                self.log(warning("Transcription returned no words. Using simulated subtitles."))
-                return self._generate_simulated_subtitles()
             # Process word-level information
             wordlevel_info = []
             for word in transcript.words:
                 word_data = {
                     "word": word.text.strip(),
-                    "start": word.start / 1000.0,
-                    "end": word.end / 1000.0
                 }
                 wordlevel_info.append(word_data)
             self.log(success(f"Transcription successful. Got {len(wordlevel_info)} words."))
             # Define constants for subtitle generation
-            FONT = self.subtitle_font
             FONTSIZE = self.font_size
             COLOR = self.text_color
             BG_COLOR = self.highlight_color if self.highlighting_enabled else None
-            FRAME_SIZE = (1080, 1920)
-            MAX_CHARS = 30
-            MAX_DURATION = 3.0
-            MAX_GAP = 2.5
-            # Split text into lines based on character count, duration, and gap
             subtitles = []
             line = []
             line_duration = 0
             for idx, word_data in enumerate(wordlevel_info):
                 line.append(word_data)
-                line_duration += word_data["end"] - word_data["start"]
                 temp = " ".join(item["word"] for item in line)
                 new_line_chars = len(temp)
                 duration_exceeded = line_duration > MAX_DURATION
                 chars_exceeded = new_line_chars > MAX_CHARS
                 if idx > 0:
-                    gap = word_data['start'] - wordlevel_info[idx - 1]['end']
                     maxgap_exceeded = gap > MAX_GAP
                 else:
                     maxgap_exceeded = False
-                # Check if any condition is exceeded to finalize the current line
                 if duration_exceeded or chars_exceeded or maxgap_exceeded:
                     if line:
                         subtitle_line = {
@@ -798,7 +845,7 @@ class YouTube:
                         line = []
                         line_duration = 0
-            # Add the remaining words as the last subtitle line if any
             if line:
                 subtitle_line = {
                     "text": " ".join(item["word"] for item in line),
@@ -809,6 +856,8 @@ class YouTube:
                 subtitles.append(subtitle_line)
             self.log(success(f"Generated {len(subtitles)} subtitle lines"))
             return {
                 "wordlevel": wordlevel_info,
                 "linelevel": subtitles,
@@ -823,101 +872,152 @@ class YouTube:
             }
         except Exception as e:
-            error_msg = f"Subtitle generation failed: {str(e)}"
             self.log(error(error_msg))
-            return self._generate_simulated_subtitles()
-    def _generate_simulated_subtitles(self):
-        """Generate simulated subtitles when AssemblyAI is not available."""
-        self.log("Generating simulated subtitles")
-        # Split script into words
-        words = self.script.split()
-        # Estimate audio duration based on word count (average speaking rate)
-        estimated_duration = len(words) * 0.3  # 0.3 seconds per word on average
-        # Generate word-level timings
-        wordlevel_info = []
-        current_time = 0
-        for word in words:
-            # Adjust duration based on word length
-            word_duration = 0.2 + min(0.05 * len(word), 0.3)  # Between 0.2 and 0.5 seconds
-            word_data = {
-                "word": word,
-                "start": current_time,
-                "end": current_time + word_duration
-            }
-            wordlevel_info.append(word_data)
-            # Add a small gap between words
-            current_time += word_duration + 0.05
-        # Generate line-level subtitles
-        subtitles = []
-        line = []
-        line_start = 0
-        line_text = ""
-        for word_data in wordlevel_info:
-            # Check if adding this word would exceed character limit
-            if len(line_text + " " + word_data["word"]) > 30 and line:
-                # Finalize current line
-                subtitle_line = {
-                    "text": line_text,
-                    "start": line_start,
-                    "end": line[-1]["end"],
-                    "words": line.copy()
-                }
-                subtitles.append(subtitle_line)
-                # Start new line
-                line = [word_data]
-                line_start = word_data["start"]
-                line_text = word_data["word"]
-            else:
-                # Add word to current line
-                line.append(word_data)
-                line_text = (line_text + " " + word_data["word"]).strip()
-                if len(line) == 1:
-                    line_start = word_data["start"]
-        # Add final line if not empty
-        if line:
-            subtitle_line = {
-                "text": line_text,
-                "start": line_start,
-                "end": line[-1]["end"],
-                "words": line
-            }
-            subtitles.append(subtitle_line)
-        self.log(success(f"Generated {len(wordlevel_info)} simulated word timings and {len(subtitles)} subtitle lines"))
-        # Define settings for subtitle display
-        settings = {
-            "font": self.subtitle_font,
-            "fontsize": self.font_size,
-            "color": self.text_color,
-            "bg_color": self.highlight_color if self.highlighting_enabled else None,
-            "position": self.subtitle_position,
-            "highlighting_enabled": self.highlighting_enabled
-        }
-        return {
-            "wordlevel": wordlevel_info,
-            "linelevel": subtitles,
-            "settings": settings
-        }
     def combine(self) -> str:
         """Combine images, audio, and subtitles into a final video."""
         self.progress(0.8, desc="Creating final video")
         self.log("Combining images and audio into final video")
         try:
             output_path = os.path.join(CACHE_DIR, f"output_{int(time.time())}.mp4")
             # Check for required files
@@ -989,6 +1089,11 @@ class YouTube:
             final_clip = concatenate_videoclips(clips)
             final_clip = final_clip.set_fps(30)
             # Add background music if available
             music_path = None
             if self.music_file == "random":
@@ -1002,120 +1107,35 @@ class YouTube:
                     music_clip = AudioFileClip(music_path)
                     # Loop music if it's shorter than the video
                     if music_clip.duration < max_duration:
-                        repeats = int(max_duration / music_clip.duration) + 1
-                        music_clip = concatenate_audioclips([music_clip] * repeats)
-                    # Trim if it's longer
                     music_clip = music_clip.subclip(0, max_duration)
-                    # Reduce volume
-                    music_clip = music_clip.fx(volumex, 0.1)
-                    # Combine audio tracks
-                    comp_audio = CompositeAudioClip([tts_clip, music_clip])
-                    final_clip = final_clip.set_audio(comp_audio)
                 except Exception as e:
-                    self.log(warning(f"Error adding background music: {str(e)}"))
-                    final_clip = final_clip.set_audio(tts_clip)
             else:
-                self.log("No background music found, using TTS audio only")
-                final_clip = final_clip.set_audio(tts_clip)
-            # Set final duration
-            final_clip = final_clip.set_duration(tts_clip.duration)
-            # Generate subtitles if enabled
-            subtitle_clips = []
-            if self.subtitles_enabled:
-                subtitles = self.generate_subtitles(self.tts_path)
-                if subtitles and 'wordlevel' in subtitles:
-                    self.log("Adding word-level subtitles")
-                    from moviepy.video.tools.subtitles import TextClip
-                    # Define subtitle styles
-                    font = subtitles['settings']['font'] if subtitles['settings']['font'] != "default" and os.path.exists(os.path.join(FONTS_DIR, f"{subtitles['settings']['font']}.ttf")) else None
-                    fontsize = subtitles['settings']['fontsize']
-                    color = subtitles['settings']['color']
-                    bg_color = subtitles['settings']['bg_color'] if subtitles['settings']['highlighting_enabled'] else None
-                    # Calculate position based on subtitle_position setting
-                    frame_width, frame_height = 1080, 1920
-                    if self.subtitle_position == "top":
-                        y_pos = frame_height * 0.1  # Position at 10% from top
-                    elif self.subtitle_position == "middle":
-                        y_pos = frame_height * 0.5  # Position at middle
-                    else:  # bottom (default)
-                        y_pos = frame_height * 0.85  # Position at 85% from top
-                    for subtitle in subtitles['linelevel']:
-                        full_duration = subtitle['end'] - subtitle['start']
-                        # Initialize position for each subtitle line
-                        x_pos = 0
-                        x_buffer = frame_width * 1 / 10
-                        # Handle word-level subtitles if highlighting is enabled
-                        if self.highlighting_enabled:
-                            # Add each word with proper timing and highlighting
-                            for word_data in subtitle['words']:
-                                word = word_data['word']
-                                start = word_data['start']
-                                end = word_data['end']
-                                # Create text clip for word
-                                try:
-                                    word_clip = TextClip(
-                                        txt=word,
-                                        font=font,
-                                        fontsize=fontsize,
-                                        color=color,
-                                        bg_color=bg_color,
-                                        stroke_color='black',
-                                        stroke_width=1
-                                    ).set_position((x_pos + x_buffer, y_pos)).set_start(start).set_duration(end - start)
-                                    subtitle_clips.append(word_clip)
-                                    x_pos += word_clip.w + 10  # Add spacing between words
-                                    # Wrap to next line if needed
-                                    if x_pos + word_clip.w > frame_width - 2 * x_buffer:
-                                        x_pos = 0
-                                        y_pos += word_clip.h + 10
-                                except Exception as e:
-                                    self.log(warning(f"Error creating subtitle for word '{word}': {str(e)}"))
-                        else:
-                            # Show entire line without word-level highlighting
-                            try:
-                                line_clip = TextClip(
-                                    txt=subtitle['text'],
-                                    font=font,
-                                    fontsize=fontsize,
-                                    color=color,
-                                    bg_color=None,
-                                    stroke_color='black',
-                                    stroke_width=1,
-                                    method='caption',
-                                    size=(frame_width - 2 * x_buffer, None),
-                                    align='center'
-                                ).set_position(('center', y_pos)).set_start(subtitle['start']).set_duration(full_duration)
-                                subtitle_clips.append(line_clip)
-                            except Exception as e:
-                                self.log(warning(f"Error creating subtitle line: {str(e)}"))
-            # Add subtitles to video if any were created
-            if subtitle_clips:
-                self.log(f"Adding {len(subtitle_clips)} subtitle clips to video")
-                final_clip = CompositeVideoClip([final_clip] + subtitle_clips)
-            # Write final video
             self.log("Writing final video file")
-            final_clip.write_videofile(output_path, threads=4, codec='libx264', audio_codec='aac')
-            success_msg = f"Video successfully created at: {output_path}"
-            self.log(success(success_msg))
-            self.video_path = output_path
             return output_path
         except Exception as e:
@@ -1135,7 +1155,6 @@ class YouTube:
                     video_clip.write_videofile(fallback_path, threads=2, codec='libx264', audio_codec='aac')
                     self.log(warning(f"Created fallback video at: {fallback_path}"))
-                    self.video_path = fallback_path
                     return fallback_path
                 else:
                     raise Exception("Cannot create fallback video: missing images or audio")
@@ -1148,6 +1167,11 @@ class YouTube:
         try:
             self.log("Starting video generation process")
             # Step 1: Generate topic
             self.log("Generating topic")
             self.generate_topic()
@@ -1181,17 +1205,23 @@ class YouTube:
             self.log("Generating speech")
             self.generate_speech(self.script)
-            # Step 7: Combine all elements into final video
             self.progress(0.8, desc="Creating final video")
             self.log("Combining all elements into final video")
             path = self.combine()
             self.progress(0.95, desc="Finalizing")
-            self.log(f"Video generation complete. File saved at: {path}")
             # Return the result
             return {
                 'video_path': path,
                 'title': self.metadata['title'],
                 'description': self.metadata['description'],
                 'subject': self.subject,
@@ -1202,7 +1232,13 @@ class YouTube:
         except Exception as e:
             error_msg = f"Error during video generation: {str(e)}"
             self.log(error(error_msg))
-            raise Exception(error_msg)
 # Data for dynamic dropdowns
 def get_text_generator_models(generator):
@@ -1269,15 +1305,15 @@ def get_tts_voices(engine):
     """Get available voices for the selected TTS engine."""
     voices = {
         "elevenlabs": [
-            "Sarah",
-            "Brian",
-            "Lily",
-            "Monika Sogam",
-            "George",
-            "River",
-            "Matilda",
-            "Will",
-            "Jessica"
         ],
         "openai": [
             "alloy",
@@ -1310,7 +1346,7 @@ def get_tts_voices(engine):
 # Create the Gradio interface
 def create_interface():
-    with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo"), title="YouTube Shorts Generator") as demo:
         with gr.Row():
             gr.Markdown(
                 """
@@ -1321,7 +1357,7 @@ def create_interface():
         with gr.Row(equal_height=True):
             # Left panel: Content Settings
-            with gr.Column(scale=1, min_width=400):
                 with gr.Group():
                     gr.Markdown("### 📝 Content")
                     niche = gr.Textbox(
@@ -1336,7 +1372,7 @@ def create_interface():
                         value="English"
                     )
-                # Middle panel: Generator Settings
                 with gr.Group():
                     gr.Markdown("### 🔧 Generator Settings")
                     with gr.Tabs():
@@ -1375,10 +1411,13 @@ def create_interface():
                                 label="Voice",
                                 value="en-US-AriaNeural"
                             )
                             music_file = gr.Dropdown(
-                                choices=get_music_files(),
                                 label="Background Music",
-                                value="random"
                             )
                         with gr.TabItem("Subtitles"):
@@ -1387,7 +1426,7 @@ def create_interface():
                             subtitle_font = gr.Dropdown(
                                 choices=get_font_files(),
                                 label="Font",
-                                value="default"
                             )
                             with gr.Row():
                                 font_size = gr.Slider(
@@ -1406,50 +1445,51 @@ def create_interface():
                                 text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
                                 highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
-                # API Keys section
-                with gr.Accordion("🔑 API Keys", open=False):
-                    gemini_api_key = gr.Textbox(
-                        label="Gemini API Key",
-                        type="password",
-                        value=os.environ.get("GEMINI_API_KEY", "")
-                    )
-                    assemblyai_api_key = gr.Textbox(
-                        label="AssemblyAI API Key",
-                        type="password",
-                        value=os.environ.get("ASSEMBLYAI_API_KEY", "")
-                    )
-                    elevenlabs_api_key = gr.Textbox(
-                        label="ElevenLabs API Key",
-                        type="password",
-                        value=os.environ.get("ELEVENLABS_API_KEY", "")
-                    )
-                    segmind_api_key = gr.Textbox(
-                        label="Segmind API Key",
-                        type="password",
-                        value=os.environ.get("SEGMIND_API_KEY", "")
-                    )
-                    openai_api_key = gr.Textbox(
-                        label="OpenAI API Key",
-                        type="password",
-                        value=os.environ.get("OPENAI_API_KEY", "")
-                    )
                 # Generate button
                 generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
             # Right panel: Output display
-            with gr.Column(scale=1, min_width=400):
                 with gr.Tabs():
                     with gr.TabItem("Video"):
-                        video_output = gr.Video(label="Generated Video", height=600)
                     with gr.TabItem("Metadata"):
                         title_output = gr.Textbox(label="Title", lines=2)
                         description_output = gr.Textbox(label="Description", lines=4)
                         script_output = gr.Textbox(label="Script", lines=8)
                     with gr.TabItem("Log"):
-                        log_output = gr.Textbox(label="Process Log", lines=20, max_lines=100)
         # Dynamic dropdown updates
         def update_text_models(generator):
@@ -1467,12 +1507,13 @@ def create_interface():
         tts_engine.change(fn=update_tts_voices, inputs=tts_engine, outputs=tts_voice)
         # Main generation function
-        def generate_youtube_short(niche, language, gemini_api_key, assemblyai_api_key,
-                                  elevenlabs_api_key, segmind_api_key, openai_api_key,
-                                  text_gen, text_model, image_gen, image_model,
                                   tts_engine, tts_voice, subtitles_enabled, highlighting_enabled,
                                   subtitle_font, font_size, subtitle_position,
-                                  text_color, highlight_color, music_file, progress=gr.Progress()):
             if not niche.strip():
                 return {
@@ -1551,20 +1592,23 @@ def create_interface():
         generate_btn.click(
             fn=generate_youtube_short,
             inputs=[
-                niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key,
-                segmind_api_key, openai_api_key, text_gen, text_model, image_gen, image_model,
                 tts_engine, tts_voice, subtitles_enabled, highlighting_enabled,
-                subtitle_font, font_size, subtitle_position, text_color, highlight_color, music_file
             ],
             outputs=[video_output, title_output, description_output, script_output, log_output]
         )
         # Add examples
         gr.Examples(
             [
-                ["Historical Facts", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-AriaNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#0000FF", "random"],
-                ["Cooking Tips", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-AriaNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#FF0000", "random"],
-                ["Technology News", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-GuyNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#00FF00", "random"],
             ],
             [niche, language, text_gen, text_model, image_gen, image_model, tts_engine, tts_voice,
              subtitles_enabled, highlighting_enabled, subtitle_font, font_size,
@@ -1577,9 +1621,10 @@ def create_interface():
 # Create and launch the interface
 if __name__ == "__main__":
     # Create necessary directories
-    os.makedirs(CACHE_DIR, exist_ok=True)
     os.makedirs(MUSIC_DIR, exist_ok=True)
     os.makedirs(FONTS_DIR, exist_ok=True)
     # Launch the app
     demo = create_interface()

 import tempfile
 import requests
 import numpy as np
+import uuid
+from PIL import Image, ImageDraw, ImageFont
 from io import BytesIO
 from datetime import datetime
 import gradio as gr
 from moviepy.audio.fx.all import volumex
 from moviepy.video.fx.all import crop
+# Suppress the asyncio "Event loop is closed" warning on Windows
+import sys
+if sys.platform.startswith('win'):
+    import asyncio
+    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 # Load environment variables from .env file if present
 load_dotenv()
+# Directory structure constants
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+STATIC_DIR = os.path.join(BASE_DIR, "static")
+MUSIC_DIR = os.path.join(STATIC_DIR, "music")
+FONTS_DIR = os.path.join(STATIC_DIR, "fonts")
+# Use temp directory for faster file operations
 CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator")
 # Create necessary directories
+os.makedirs(STATIC_DIR, exist_ok=True)
 os.makedirs(MUSIC_DIR, exist_ok=True)
 os.makedirs(FONTS_DIR, exist_ok=True)
+os.makedirs(CACHE_DIR, exist_ok=True)
 # Helper functions for logging
 def info(message):
     if not font_files:
         return ["default"]
+    return ["random"] + font_files
 def choose_random_music():
     """Selects a random music file from the music directory."""
     return os.path.join(MUSIC_DIR, random.choice(music_files))
+def choose_random_font():
+    """Selects a random font file from the fonts directory."""
+    if not os.path.exists(FONTS_DIR):
+        error(f"Fonts directory {FONTS_DIR} does not exist")
+        return "default"
+    font_files = [f for f in os.listdir(FONTS_DIR) if f.endswith(('.ttf', '.otf'))]
+    if not font_files:
+        warning(f"No font files found in {FONTS_DIR}")
+        return None
+    return font_files[0].split('.')[0] if len(font_files) == 1 else random.choice([f.split('.')[0] for f in font_files])
 class YouTube:
     def __init__(self, niche: str, language: str,
                  text_gen="g4f", text_model="gpt-4",
                 ).choices[0].message.content
             else:
+                # No fallback, raise an exception for unsupported text generator
+                error_msg = f"Unsupported text generator: {self.text_gen}"
+                self.log(error(error_msg))
+                raise ValueError(error_msg)
             self.log(f"Response generated successfully, length: {len(response)} characters")
             return response
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
+            self.log(error(error_msg))
             raise Exception(error_msg)
     def generate_topic(self) -> str:
             raise Exception("Failed to generate a script. Please try again.")
         if len(completion) > 5000:
+            self.log(warning("Generated script is too long."))
+            raise ValueError("Generated script exceeds 5000 characters. Please try again.")
         self.script = completion
         self.log(success(f"Generated script ({len(completion)} chars)"))
         )
         if len(title) > 100:
+            self.log(warning("Generated title exceeds 100 characters."))
+            raise ValueError("Generated title exceeds 100 characters. Please try again.")
         description = self.generate_response(
             f"Please generate a YouTube Video Description for the following script: {self.script}. "
                 r = re.compile(r"\[.*\]", re.DOTALL)
                 matches = r.findall(completion)
                 if len(matches) == 0:
+                    self.log(warning("Failed to extract array. Unable to create image prompts."))
+                    raise ValueError("Failed to generate valid image prompts. Please try again.")
                 else:
                     try:
                         image_prompts = json.loads(matches[0])
                         if strings:
                             image_prompts = strings
                         else:
+                            self.log(error("Failed to extract strings from regex match."))
+                            raise ValueError("Failed to parse image prompts. Please try again.")
         # Ensure we have the requested number of prompts
+        if len(image_prompts) < count:
+            self.log(warning(f"Received fewer prompts ({len(image_prompts)}) than requested ({count})."))
+            raise ValueError(f"Received only {len(image_prompts)} prompts instead of {count}. Please try again.")
         # Limit to the requested count
         image_prompts = image_prompts[:count]
         """Generate an image using the selected image generation model."""
         self.log(f"Generating image for prompt: {prompt[:50]}...")
+        # Use simpler file naming for speed
+        image_path = os.path.join(CACHE_DIR, f"img_{len(self.images)}_{int(time.time())}.png")
         try:
             if self.image_gen == "prodia":
                 self.log("Using Prodia provider for image generation")
                 s = requests.Session()
             elif self.image_gen == "g4f":
                 self.log("Using G4F provider for image generation")
+                from g4f.client import Client
+                client = Client()
+                response = client.images.generate(
+                    model=self.image_model,
+                    prompt=prompt,
+                    response_format="url"
+                )
+                if response and response.data and len(response.data) > 0:
+                    image_url = response.data[0].url
+                    image_response = requests.get(image_url)
+                    if image_response.status_code == 200:
+                        with open(image_path, "wb") as f:
+                            f.write(image_response.content)
+                        self.images.append(image_path)
+                        self.log(success(f"Image saved to: {image_path}"))
+                        return image_path
                     else:
+                        raise Exception(f"Failed to download image from {image_url}")
+                else:
+                    raise Exception("No image URL received from G4F")
             elif self.image_gen == "segmind":
                 self.log("Using Segmind provider for image generation")
                     raise Exception(f"Pollinations request failed with status code: {response.status_code}")
             else:
+                # Create a fallback colored placeholder image instead of throwing an error
                 self.log(f"Unknown provider '{self.image_gen}'. Generating placeholder image.")
                 img = Image.new('RGB', (800, 800), color=(random.randint(0, 255),
                                                          random.randint(0, 255),
                                                          random.randint(0, 255)))
             error_msg = f"Image generation failed: {str(e)}"
             self.log(error(error_msg))
+            # Create a fallback image instead of failing completely
             try:
                 img = Image.new('RGB', (800, 800), color=(200, 200, 200))
                 image_path = os.path.join(CACHE_DIR, f"error_img_{len(self.images)}_{int(time.time())}.png")
         self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
+        # Use simpler file naming for speed
         audio_path = os.path.join(CACHE_DIR, f"speech_{int(time.time())}.{output_format}")
         try:
                 payload = {
                     "text": text,
+                    "model_id": "eleven_turbo_v2",  # Using latest and most capable model
                     "voice_settings": {
                         "stability": 0.5,
                         "similarity_boost": 0.5,
                         "style": 0.0,
                         "use_speaker_boost": True
+                    },
+                    "output_format": "mp3_44100_128",  # Higher quality audio (44.1kHz, 128kbps)
+                    "optimize_streaming_latency": 0    # Optimize for quality over latency
                 }
+                # Map voice names to ElevenLabs voice IDs
+                voice_id_mapping = {
+                    "Sarah": "21m00Tcm4TlvDq8ikWAM",
+                    "Brian": "hxppwzoRmvxK7YkDrjhQ",
+                    "Lily": "p7TAj7L6QVq1fE6XGyjR",
+                    "Monika Sogam": "Fc3XhIu9tfgOPOsU1hMr",
+                    "George": "o7lPjDgzlF8ZAeSpqmaN",
+                    "River": "f0k5evLkhJxrIRJXQJvy",
+                    "Matilda": "XrExE9yKIg1WjnnlVkGX",
+                    "Will": "pvKWM1B1sNRNTlEYYAEZ",
+                    "Jessica": "A5EAMYWMCSsLNL1wYxOv",
+                    "default": "21m00Tcm4TlvDq8ikWAM"  # Default to Sarah
+                }
+                # Get the voice ID from mapping or use the voice name as ID if not found
+                voice_id = voice_id_mapping.get(self.tts_voice, self.tts_voice)
+                self.log(f"Using ElevenLabs voice: {self.tts_voice} (ID: {voice_id})")
                 response = requests.post(
                     url=f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
                         f.write(response.content)
                     self.log(success(f"Speech generated successfully using ElevenLabs at {audio_path}"))
                 else:
+                    try:
+                        error_data = response.json()
+                        error_message = error_data.get('detail', {}).get('message', response.text)
+                        error_status = error_data.get('status', 'error')
+                        raise Exception(f"ElevenLabs API error ({response.status_code}, {error_status}): {error_message}")
+                    except ValueError:
+                        # If JSON parsing fails, use the raw response
+                        raise Exception(f"ElevenLabs API error ({response.status_code}): {response.text}")
             elif self.tts_engine == "gtts":
                 self.log("Using Google TTS provider for speech generation")
                 from gtts import gTTS
                 asyncio.run(generate())
             else:
+                # Default to edge TTS if other methods aren't available
+                self.log(f"Using default Edge TTS as fallback")
+                import edge_tts
+                import asyncio
+                voice = "en-US-AriaNeural"
+                async def generate():
+                    communicate = edge_tts.Communicate(text, voice)
+                    await communicate.save(audio_path)
+                asyncio.run(generate())
             self.log(success(f"Speech generated and saved to: {audio_path}"))
             self.tts_path = audio_path
                 self.log(error("Failed to create silent audio fallback"))
                 return None
+    def generate_subtitles(self, audio_path: str) -> dict:
+        """Generate subtitles from audio using AssemblyAI."""
+        self.log("Generating subtitles from audio")
         try:
+            import assemblyai as aai
+            # Check if API key is set
+            aai_api_key = os.environ.get("ASSEMBLYAI_API_KEY", "")
+            if not aai_api_key:
+                raise ValueError("AssemblyAI API key is not set. Please provide a valid API key.")
+            aai.settings.api_key = aai_api_key
             config = aai.TranscriptionConfig(speaker_labels=False, word_boost=[], format_text=True)
             transcriber = aai.Transcriber(config=config)
             transcript = transcriber.transcribe(audio_path)
             if not transcript or not transcript.words:
+                raise ValueError("Transcription returned no words.")
             # Process word-level information
             wordlevel_info = []
             for word in transcript.words:
                 word_data = {
                     "word": word.text.strip(),
+                    "start": word.start / 1000.0,  # Convert from ms to seconds
+                    "end": word.end / 1000.0       # Convert from ms to seconds
                 }
                 wordlevel_info.append(word_data)
             self.log(success(f"Transcription successful. Got {len(wordlevel_info)} words."))
             # Define constants for subtitle generation
+            # Handle random font selection if configured
+            if self.subtitle_font == "random":
+                FONT = choose_random_font()
+                self.log(f"Using random font: {FONT}")
+            else:
+                FONT = self.subtitle_font
             FONTSIZE = self.font_size
             COLOR = self.text_color
             BG_COLOR = self.highlight_color if self.highlighting_enabled else None
+            FRAME_SIZE = (1080, 1920)  # Vertical video format
+            # Constants for line splitting
+            MAX_CHARS = 30  # Maximum characters per line for vertical video format
+            MAX_DURATION = 3.0  # Maximum duration for a single line
+            MAX_GAP = 1.5  # Split if nothing is spoken for this many seconds
+            # Split text into lines
             subtitles = []
             line = []
             line_duration = 0
             for idx, word_data in enumerate(wordlevel_info):
+                word = word_data["word"]
+                start = word_data["start"]
+                end = word_data["end"]
                 line.append(word_data)
+                line_duration += end - start
                 temp = " ".join(item["word"] for item in line)
                 new_line_chars = len(temp)
                 duration_exceeded = line_duration > MAX_DURATION
                 chars_exceeded = new_line_chars > MAX_CHARS
                 if idx > 0:
+                    gap = word_data['start'] - wordlevel_info[idx-1]['end']
                     maxgap_exceeded = gap > MAX_GAP
                 else:
                     maxgap_exceeded = False
                 if duration_exceeded or chars_exceeded or maxgap_exceeded:
                     if line:
                         subtitle_line = {
                         line = []
                         line_duration = 0
+            # Add remaining words as last line
             if line:
                 subtitle_line = {
                     "text": " ".join(item["word"] for item in line),
                 subtitles.append(subtitle_line)
             self.log(success(f"Generated {len(subtitles)} subtitle lines"))
+            # Return the subtitle data and settings
             return {
                 "wordlevel": wordlevel_info,
                 "linelevel": subtitles,
             }
         except Exception as e:
+            error_msg = f"Error generating subtitles: {str(e)}"
             self.log(error(error_msg))
+            raise Exception(error_msg)
+    def create_subtitle_clip(self, subtitle_data, frame_size):
+        """Create subtitle clips for a line of text with word-level highlighting."""
+        settings = subtitle_data["settings"]
+        font_name = settings["font"]
+        fontsize = settings["fontsize"]
+        color = settings["color"]
+        bg_color = settings["bg_color"]
+        highlighting_enabled = settings["highlighting_enabled"]
+        def create_text_clip(text, font_size, color, bg_color=None):
+            try:
+                # Try to use the specified font, fallback to default
+                try:
+                    # Check if font is a path or just a name
+                    font_path = os.path.join(FONTS_DIR, f"{font_name}.ttf")
+                    if os.path.exists(font_path):
+                        pil_font = ImageFont.truetype(font_path, font_size)
+                    else:
+                        self.log(warning(f"Font {font_name} not found, using default"))
+                        pil_font = ImageFont.load_default()
+                except Exception as e:
+                    self.log(warning(f"Error loading font: {str(e)}"))
+                    pil_font = ImageFont.load_default()
+                # Get text size
+                text_width, text_height = pil_font.getbbox(text)[2:4]
+                # Add padding
+                padding = 10
+                img_width = text_width + padding * 2
+                img_height = text_height + padding * 2
+                # Create image with background color or transparent
+                if bg_color:
+                    if bg_color.startswith('#'):
+                        bg_color_rgb = tuple(int(bg_color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
+                    else:
+                        bg_color_rgb = (0, 0, 255)  # Default blue
+                    img = Image.new('RGB', (img_width, img_height), color=bg_color_rgb)
+                else:
+                    img = Image.new('RGBA', (img_width, img_height), color=(0, 0, 0, 0))
+                # Draw text
+                draw = ImageDraw.Draw(img)
+                if color.startswith('#'):
+                    text_color_rgb = tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
+                else:
+                    text_color_rgb = (255, 255, 255)  # Default white
+                draw.text((padding, padding), text, font=pil_font, fill=text_color_rgb)
+                # Convert to numpy array for MoviePy
+                img_array = np.array(img)
+                clip = ImageClip(img_array)
+                return clip, img_width, img_height
+            except Exception as e:
+                self.log(warning(f"Error creating text clip: {str(e)}"))
+                # Create a simple colored rectangle as fallback
+                img = Image.new('RGB', (100, 50), color=(100, 100, 100))
+                img_array = np.array(img)
+                clip = ImageClip(img_array)
+                return clip, 100, 50
+        subtitle_clips = []
+        for line in subtitle_data["linelevel"]:
+            x_pos = 0
+            y_pos = 0
+            word_positions = []
+            # Calculate vertical position based on subtitle position setting
+            if settings["position"] == "top":
+                y_buffer = frame_size[1] * 0.1  # 10% from top
+            elif settings["position"] == "middle":
+                y_buffer = frame_size[1] * 0.4  # 40% from top
+            else:  # bottom
+                y_buffer = frame_size[1] * 0.7  # 70% from top
+            x_buffer = frame_size[0] * 0.1  # 10% from left
+            space_width = 20
+            # Create clips for each word in the line
+            for word_data in line["words"]:
+                word = word_data["word"]
+                start_time = word_data["start"]
+                end_time = word_data["end"]
+                duration = end_time - start_time
+                # Create word clip
+                word_clip, word_width, word_height = create_text_clip(word, fontsize, color)
+                # Check if word fits on current line
+                if x_pos + word_width + space_width > frame_size[0] - 2 * x_buffer:
+                    x_pos = 0
+                    y_pos += word_height + 20
+                # Store word position info
+                word_positions.append({
+                    "word": word,
+                    "x_pos": x_pos + x_buffer,
+                    "y_pos": y_pos + y_buffer,
+                    "width": word_width,
+                    "height": word_height,
+                    "start": start_time,
+                    "end": end_time
+                })
+                # Set position and timing for word clip
+                word_clip = word_clip.set_position((x_pos + x_buffer, y_pos + y_buffer))
+                word_clip = word_clip.set_start(line["start"]).set_duration(line["end"] - line["start"])
+                subtitle_clips.append(word_clip)
+                # Add space after word
+                space_clip, _, _ = create_text_clip(" ", fontsize, color)
+                space_clip = space_clip.set_position((x_pos + word_width + x_buffer, y_pos + y_buffer))
+                space_clip = space_clip.set_start(line["start"]).set_duration(line["end"] - line["start"])
+                subtitle_clips.append(space_clip)
+                x_pos += word_width + space_width
+            # Add highlighted words if enabled
+            if highlighting_enabled and bg_color:
+                for word_pos in word_positions:
+                    highlight_clip, _, _ = create_text_clip(
+                        word_pos["word"],
+                        fontsize,
+                        color,
+                        bg_color
+                    )
+                    highlight_clip = highlight_clip.set_position((word_pos["x_pos"], word_pos["y_pos"]))
+                    highlight_clip = highlight_clip.set_start(word_pos["start"]).set_duration(word_pos["end"] - word_pos["start"])
+                    subtitle_clips.append(highlight_clip)
+        return subtitle_clips
     def combine(self) -> str:
         """Combine images, audio, and subtitles into a final video."""
         self.progress(0.8, desc="Creating final video")
         self.log("Combining images and audio into final video")
         try:
+            # Use simple file naming for faster processing
             output_path = os.path.join(CACHE_DIR, f"output_{int(time.time())}.mp4")
             # Check for required files
             final_clip = concatenate_videoclips(clips)
             final_clip = final_clip.set_fps(30)
+            # Add subtitles if enabled
+            if self.subtitles_enabled and hasattr(self, 'subtitle_data'):
+                subtitle_clips = self.create_subtitle_clip(self.subtitle_data, (1080, 1920))
+                final_clip = CompositeVideoClip([final_clip] + subtitle_clips)
             # Add background music if available
             music_path = None
             if self.music_file == "random":
                     music_clip = AudioFileClip(music_path)
                     # Loop music if it's shorter than the video
                     if music_clip.duration < max_duration:
+                        num_loops = int(np.ceil(max_duration / music_clip.duration))
+                        music_clip = concatenate_audioclips([music_clip] * num_loops)
+                    # Trim music if it's longer than the video
                     music_clip = music_clip.subclip(0, max_duration)
+                    # Reduce music volume
+                    music_clip = music_clip.volumex(0.1)
+                    # Combine with TTS audio
+                    final_audio = CompositeAudioClip([tts_clip, music_clip])
                 except Exception as e:
+                    self.log(warning(f"Error processing music: {str(e)}"))
+                    final_audio = tts_clip
             else:
+                final_audio = tts_clip
+            # Set final audio
+            final_clip = final_clip.set_audio(final_audio)
+            # Write final video - use faster encoding settings
             self.log("Writing final video file")
+            final_clip.write_videofile(
+                output_path,
+                fps=30,
+                codec="libx264",
+                audio_codec="aac",
+                threads=4,
+                # Remove preset parameter for faster encoding
+            )
+            self.log(success(f"Video saved to: {output_path}"))
             return output_path
         except Exception as e:
                     video_clip.write_videofile(fallback_path, threads=2, codec='libx264', audio_codec='aac')
                     self.log(warning(f"Created fallback video at: {fallback_path}"))
                     return fallback_path
                 else:
                     raise Exception("Cannot create fallback video: missing images or audio")
         try:
             self.log("Starting video generation process")
+            # Create a simple generation directory - avoid complex numbering schemes
+            self.generation_folder = os.path.join(CACHE_DIR, f"gen_{int(time.time())}")
+            os.makedirs(self.generation_folder, exist_ok=True)
+            self.log(f"Created generation folder: {self.generation_folder}")
             # Step 1: Generate topic
             self.log("Generating topic")
             self.generate_topic()
             self.log("Generating speech")
             self.generate_speech(self.script)
+            # Step 7: Generate subtitles
+            self.progress(0.7, desc="Generating subtitles")
+            if self.subtitles_enabled and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
+                self.subtitle_data = self.generate_subtitles(self.tts_path)
+            # Step 8: Combine all elements into final video
             self.progress(0.8, desc="Creating final video")
             self.log("Combining all elements into final video")
             path = self.combine()
             self.progress(0.95, desc="Finalizing")
+            self.log(f"Video generation complete. Files saved in: {self.generation_folder}")
             # Return the result
             return {
                 'video_path': path,
+                'generation_folder': self.generation_folder,
                 'title': self.metadata['title'],
                 'description': self.metadata['description'],
                 'subject': self.subject,
         except Exception as e:
             error_msg = f"Error during video generation: {str(e)}"
             self.log(error(error_msg))
+            # Return basic data even on error
+            return {
+                'video_path': getattr(self, 'video_path', None),
+                'error': str(e),
+                'logs': self.logs
+            }
 # Data for dynamic dropdowns
 def get_text_generator_models(generator):
     """Get available voices for the selected TTS engine."""
     voices = {
         "elevenlabs": [
+            "Sarah",      # Female, American accent
+            "Brian",      # Male, British accent
+            "Lily",       # Female, British accent
+            "Monika Sogam", # Female, Indian accent
+            "George",     # Male, American accent
+            "River",      # Female, American accent
+            "Matilda",    # Female, British accent
+            "Will",       # Male, American accent
+            "Jessica"     # Female, American accent
         ],
         "openai": [
             "alloy",
 # Create the Gradio interface
 def create_interface():
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="indigo", radius_size="lg"), title="YouTube Shorts Generator") as demo:
         with gr.Row():
             gr.Markdown(
                 """
         with gr.Row(equal_height=True):
             # Left panel: Content Settings
+            with gr.Column(scale=2, min_width=500):
                 with gr.Group():
                     gr.Markdown("### 📝 Content")
                     niche = gr.Textbox(
                         value="English"
                     )
+                # Generator Settings
                 with gr.Group():
                     gr.Markdown("### 🔧 Generator Settings")
                     with gr.Tabs():
                                 label="Voice",
                                 value="en-US-AriaNeural"
                             )
+                            # Fix for music_file - Get available music and set proper default
+                            music_choices = get_music_files()
+                            default_music = "none" if "random" not in music_choices else "random"
                             music_file = gr.Dropdown(
+                                choices=music_choices,
                                 label="Background Music",
+                                value=default_music
                             )
                         with gr.TabItem("Subtitles"):
                             subtitle_font = gr.Dropdown(
                                 choices=get_font_files(),
                                 label="Font",
+                                value="random"
                             )
                             with gr.Row():
                                 font_size = gr.Slider(
                                 text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
                                 highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
                 # Generate button
                 generate_btn = gr.Button("🎬 Generate Video", variant="primary", size="lg")
             # Right panel: Output display
+            with gr.Column(scale=1, min_width=300):
                 with gr.Tabs():
                     with gr.TabItem("Video"):
+                        # Larger video preview with proper mobile proportions
+                        video_output = gr.Video(label="Generated Video", height=580, width=330)
                     with gr.TabItem("Metadata"):
                         title_output = gr.Textbox(label="Title", lines=2)
                         description_output = gr.Textbox(label="Description", lines=4)
                         script_output = gr.Textbox(label="Script", lines=8)
+                    # API Keys section as a tab
+                    with gr.TabItem("🔑 API Keys"):
+                        gemini_api_key = gr.Textbox(
+                            label="Gemini API Key",
+                            type="password",
+                            value=os.environ.get("GEMINI_API_KEY", "")
+                        )
+                        assemblyai_api_key = gr.Textbox(
+                            label="AssemblyAI API Key",
+                            type="password",
+                            value=os.environ.get("ASSEMBLYAI_API_KEY", "")
+                        )
+                        elevenlabs_api_key = gr.Textbox(
+                            label="ElevenLabs API Key",
+                            type="password",
+                            value=os.environ.get("ELEVENLABS_API_KEY", "")
+                        )
+                        segmind_api_key = gr.Textbox(
+                            label="Segmind API Key",
+                            type="password",
+                            value=os.environ.get("SEGMIND_API_KEY", "")
+                        )
+                        openai_api_key = gr.Textbox(
+                            label="OpenAI API Key",
+                            type="password",
+                            value=os.environ.get("OPENAI_API_KEY", "")
+                        )
                     with gr.TabItem("Log"):
+                        log_output = gr.Textbox(label="Process Log", lines=15, max_lines=100)
         # Dynamic dropdown updates
         def update_text_models(generator):
         tts_engine.change(fn=update_tts_voices, inputs=tts_engine, outputs=tts_voice)
         # Main generation function
+        def generate_youtube_short(niche, language, text_gen, text_model, image_gen, image_model,
                                   tts_engine, tts_voice, subtitles_enabled, highlighting_enabled,
                                   subtitle_font, font_size, subtitle_position,
+                                  text_color, highlight_color, music_file,
+                                  gemini_api_key, assemblyai_api_key,
+                                  elevenlabs_api_key, segmind_api_key, openai_api_key,
+                                  progress=gr.Progress()):
             if not niche.strip():
                 return {
         generate_btn.click(
             fn=generate_youtube_short,
             inputs=[
+                niche, language, text_gen, text_model, image_gen, image_model,
                 tts_engine, tts_voice, subtitles_enabled, highlighting_enabled,
+                subtitle_font, font_size, subtitle_position, text_color, highlight_color, music_file,
+                gemini_api_key, assemblyai_api_key, elevenlabs_api_key, segmind_api_key, openai_api_key
             ],
             outputs=[video_output, title_output, description_output, script_output, log_output]
         )
         # Add examples
+        music_choices = get_music_files()
+        default_music = "none" if "random" not in music_choices else "random"
         gr.Examples(
             [
+                ["Historical Facts", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-AriaNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#0000FF", default_music],
+                ["Cooking Tips", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-AriaNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#FF0000", default_music],
+                ["Technology News", "English", "g4f", "gpt-4", "g4f", "flux", "edge", "en-US-GuyNeural", True, True, "default", 80, "bottom", "#FFFFFF", "#00FF00", default_music],
             ],
             [niche, language, text_gen, text_model, image_gen, image_model, tts_engine, tts_voice,
              subtitles_enabled, highlighting_enabled, subtitle_font, font_size,
 # Create and launch the interface
 if __name__ == "__main__":
     # Create necessary directories
+    os.makedirs(STATIC_DIR, exist_ok=True)
     os.makedirs(MUSIC_DIR, exist_ok=True)
     os.makedirs(FONTS_DIR, exist_ok=True)
+    os.makedirs(CACHE_DIR, exist_ok=True)
     # Launch the app
     demo = create_interface()