diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,324 +1,94 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>YouTube Shorts Generator - Gradio App</title>
-    <script src="https://cdn.tailwindcss.com"></script>
-    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
-    <script src="https://cdn.jsdelivr.net/npm/@gradio/client@0.1.4/dist/index.js"></script>
-    <style>
-        /* Custom styles to make it look like Gradio */
-        .gradio-container {
-            font-family: 'Source Sans Pro', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', sans-serif;
-        }
-        .gradio-button {
-            background-color: #2A6AC7;
-            color: white;
-            border: none;
-            border-radius: 4px;
-            padding: 10px 20px;
-            font-weight: 600;
-            cursor: pointer;
-            transition: background-color 0.3s;
-        }
-        .gradio-button:hover {
-            background-color: #1D5BBF;
-        }
-        .gradio-button:disabled {
-            background-color: #B2C9F1;
-            cursor: not-allowed;
-        }
-        .gradio-checkbox {
-            width: 18px;
-            height: 18px;
-            margin-right: 10px;
-        }
-        .gradio-slider::-webkit-slider-thumb {
-            background: #2A6AC7;
-        }
-        .dark .gradio-container {
-            background-color: #0B0F19;
-            color: #F3F4F7;
-        }
-        .dark .gradio-input {
-            background-color: #1F2937;
-            border-color: #374151;
-            color: #F9FAFB;
-        }
-        .loading-spinner {
-            border: 4px solid rgba(0, 0, 0, 0.1);
-            border-left-color: #2A6AC7;
-            border-radius: 50%;
-            width: 30px;
-            height: 30px;
-            animation: spin 1s linear infinite;
-        }
-        @keyframes spin {
-            to { transform: rotate(360deg); }
-        }
-        .dark .loading-spinner {
-            border-color: rgba(255, 255, 255, 0.1);
-            border-left-color: #2A6AC7;
-        }
-    </style>
-</head>
-<body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen">
-    <div class="container mx-auto px-4 py-8 max-w-5xl gradio-container">
-        <h1 class="text-3xl font-bold mb-2 text-center">YouTube Shorts Generator</h1>
-        <p class="text-center mb-6 text-gray-600 dark:text-gray-400">Generate short videos based on a niche and language</p>
-        
-        <div class="grid grid-cols-1 md:grid-cols-5 gap-6">
-            <!-- Left Column: Inputs -->
-            <div class="md:col-span-2 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md">
-                <!-- Required Inputs Section -->
-                <div class="mb-6">
-                    <h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Required Inputs</h2>
-                    
-                    <div class="mb-4">
-                        <label for="niche" class="block text-sm font-medium mb-1">Niche/Topic <span class="text-red-500">*</span></label>
-                        <input type="text" id="niche" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="E.g., Fitness tips, Technology facts">
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="language" class="block text-sm font-medium mb-1">Language <span class="text-red-500">*</span></label>
-                        <select id="language" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input">
-                            <option value="English">English</option>
-                            <option value="Spanish">Spanish</option>
-                            <option value="French">French</option>
-                            <option value="German">German</option>
-                            <option value="Italian">Italian</option>
-                            <option value="Portuguese">Portuguese</option>
-                            <option value="Russian">Russian</option>
-                            <option value="Japanese">Japanese</option>
-                            <option value="Chinese">Chinese</option>
-                            <option value="Hindi">Hindi</option>
-                        </select>
-                    </div>
-                </div>
-                
-                <!-- API Keys Section -->
-                <div class="mb-6">
-                    <h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">API Keys</h2>
-                    
-                    <div class="mb-4">
-                        <label for="gemini_api_key" class="block text-sm font-medium mb-1">Gemini API Key</label>
-                        <input type="password" id="gemini_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your Gemini API key">
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="assemblyai_api_key" class="block text-sm font-medium mb-1">AssemblyAI API Key</label>
-                        <input type="password" id="assemblyai_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your AssemblyAI API key">
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="elevenlabs_api_key" class="block text-sm font-medium mb-1">ElevenLabs API Key</label>
-                        <input type="password" id="elevenlabs_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your ElevenLabs API key">
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="segmind_api_key" class="block text-sm font-medium mb-1">Segmind API Key</label>
-                        <input type="password" id="segmind_api_key" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="Enter your Segmind API key">
-                    </div>
-                </div>
-                
-                <!-- Model Selection Section -->
-                <div class="mb-6">
-                    <h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Model Selection</h2>
-                    
-                    <div class="mb-4">
-                        <label for="text_gen" class="block text-sm font-medium mb-1">Text Generator</label>
-                        <select id="text_gen" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input">
-                            <option value="gemini">Gemini</option>
-                            <option value="g4f">G4F Models</option>
-                        </select>
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="image_gen" class="block text-sm font-medium mb-1">Image Generator</label>
-                        <select id="image_gen" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input">
-                            <option value="prodia">Prodia</option>
-                            <option value="hercai">Hercai</option>
-                            <option value="g4f">G4F</option>
-                            <option value="segmind">Segmind</option>
-                            <option value="pollinations">Pollinations</option>
-                        </select>
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="tts_engine" class="block text-sm font-medium mb-1">Text-to-Speech Engine</label>
-                        <select id="tts_engine" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input">
-                            <option value="elevenlabs">ElevenLabs</option>
-                            <option value="bark">Bark</option>
-                            <option value="gtts">Google TTS</option>
-                            <option value="openai">OpenAI TTS</option>
-                            <option value="edge">Edge TTS</option>
-                            <option value="local_tts">Local TTS</option>
-                            <option value="xtts">XTTS</option>
-                            <option value="rvc">RVC</option>
-                        </select>
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="tts_voice" class="block text-sm font-medium mb-1">TTS Voice</label>
-                        <input type="text" id="tts_voice" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input" placeholder="E.g., Sarah, Brian, Lily, Monika Sogam">
-                    </div>
-                </div>
-                
-                <!-- Subtitle Options Section -->
-                <div class="mb-6">
-                    <h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Subtitle Options</h2>
-                    
-                    <div class="mb-4">
-                        <label for="subtitle_font" class="block text-sm font-medium mb-1">Font</label>
-                        <select id="subtitle_font" class="w-full px-4 py-2 rounded-md border border-gray-300 dark:border-gray-700 bg-white dark:bg-gray-700 text-base gradio-input">
-                            <option value="Helvetica-Bold">Helvetica Bold</option>
-                            <option value="Arial-Bold">Arial Bold</option>
-                            <option value="Impact">Impact</option>
-                            <option value="Comic-Sans-MS">Comic Sans MS</option>
-                        </select>
-                    </div>
-                    
-                    <div class="mb-4">
-                        <label for="font_size" class="block text-sm font-medium mb-1">Font Size: <span id="font_size_value">80</span></label>
-                        <input type="range" id="font_size" min="40" max="120" value="80" class="w-full gradio-slider">
-                    </div>
-                    
-                    <div class="grid grid-cols-2 gap-4 mb-4">
-                        <div>
-                            <label for="text_color" class="block text-sm font-medium mb-1">Text Color</label>
-                            <input type="color" id="text_color" value="#FFFFFF" class="w-full h-10 gradio-input">
-                        </div>
-                        <div>
-                            <label for="highlight_color" class="block text-sm font-medium mb-1">Highlight Color</label>
-                            <input type="color" id="highlight_color" value="#0000FF" class="w-full h-10 gradio-input">
-                        </div>
-                    </div>
-                </div>
-                
-                <button id="generate_btn" class="w-full py-3 px-4 rounded-md font-medium transition duration-200 gradio-button">
-                    Generate Video
-                </button>
-            </div>
-            
-            <!-- Right Column: Output -->
-            <div class="md:col-span-3 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md">
-                <h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Generated Content</h2>
-                
-                <!-- Loading indicator -->
-                <div id="loading" class="hidden flex-col items-center justify-center py-12">
-                    <div class="loading-spinner mb-4"></div>
-                    <div id="status_message" class="text-lg font-medium">Generating content...</div>
-                    <div id="progress_detail" class="text-sm text-gray-500 dark:text-gray-400 mt-2"></div>
-                </div>
-                
-                <!-- Results container -->
-                <div id="results" class="hidden">
-                    <!-- Video preview -->
-                    <div class="mb-6">
-                        <h3 class="font-medium mb-2">Video Preview</h3>
-                        <div class="relative pt-[56.25%] bg-black rounded-lg">
-                            <video id="video_player" controls class="absolute top-0 left-0 w-full h-full rounded-lg">
-                                Your browser does not support the video tag.
-                            </video>
-                        </div>
-                    </div>
-                    
-                    <!-- Title and Description -->
-                    <div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-6">
-                        <div>
-                            <h3 class="font-medium mb-2">Title</h3>
-                            <div id="video_title" class="bg-white dark:bg-gray-700 p-3 rounded-md"></div>
-                        </div>
-                        <div>
-                            <h3 class="font-medium mb-2">Description</h3>
-                            <div id="video_description" class="bg-white dark:bg-gray-700 p-3 rounded-md h-24 overflow-y-auto"></div>
-                        </div>
-                    </div>
-                    
-                    <!-- Logs and information -->
-                    <div>
-                        <h3 class="font-medium mb-2">Process Log</h3>
-                        <div id="log_output" class="bg-white dark:bg-gray-700 p-3 rounded-md h-64 overflow-y-auto font-mono text-sm"></div>
-                    </div>
-                </div>
-            </div>
-        </div>
-        
-        <!-- Requirements.txt Section -->
-        <div class="mt-8 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md">
-            <h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">Requirements.txt</h2>
-            <pre class="bg-white dark:bg-gray-700 p-4 rounded-md overflow-x-auto text-sm">
-gradio==3.50.2
-g4f==0.1.9.0
-moviepy==1.0.3
-assemblyai==0.17.0
-requests==2.31.0
-google-generativeai==0.3.1
-python-dotenv==1.0.0
-Pillow==10.0.0
-openai==1.3.5
-edge-tts==6.1.9
-bark==0.0.1
-tensorflow==2.12.0
-soundfile==0.12.1
-TTS==0.21.1
-rvc-engine==0.0.1
-termcolor==2.3.0
-            </pre>
-        </div>
-        
-        <!-- Python Code Section -->
-        <div class="mt-8 bg-gray-100 dark:bg-gray-800 p-6 rounded-lg shadow-md">
-            <h2 class="text-xl font-semibold mb-4 pb-2 border-b border-gray-300 dark:border-gray-700">app.py</h2>
-            <pre class="bg-white dark:bg-gray-700 p-4 rounded-md overflow-x-auto text-sm">
 import os
 import re
-import g4f
 import json
 import time
 import random
 import tempfile
 import requests
-import assemblyai as aai
-from moviepy.editor import *
+import numpy as np
+from PIL import Image
+from io import BytesIO
 from datetime import datetime
 import gradio as gr
 from dotenv import load_dotenv
+import moviepy.editor as mpy
+from moviepy.editor import *
+from moviepy.audio.fx.all import volumex
+from moviepy.video.fx.all import crop
 
-# Load environment variables from .env file
+# Load environment variables from .env file if present
 load_dotenv()
 
 # Constants
 CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator")
+ASSETS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
+MUSIC_DIR = os.path.join(ASSETS_DIR, "background_music")
+FONTS_DIR = os.path.join(ASSETS_DIR, "fonts")
+
+# Create necessary directories
 os.makedirs(CACHE_DIR, exist_ok=True)
+os.makedirs(MUSIC_DIR, exist_ok=True)
+os.makedirs(FONTS_DIR, exist_ok=True)
 
-# Helper functions
+# Helper functions for logging
 def info(message):
-    print(f"[INFO] {message}")
-    return f"[INFO] {message}"
+    timestamp = datetime.now().strftime("%H:%M:%S")
+    formatted_message = f"[{timestamp}] [INFO] {message}"
+    print(formatted_message)
+    return formatted_message
 
 def success(message):
-    print(f"[SUCCESS] {message}")
-    return f"[SUCCESS] {message}"
+    timestamp = datetime.now().strftime("%H:%M:%S")
+    formatted_message = f"[{timestamp}] [SUCCESS] {message}"
+    print(formatted_message)
+    return formatted_message
 
 def warning(message):
-    print(f"[WARNING] {message}")
-    return f"[WARNING] {message}"
+    timestamp = datetime.now().strftime("%H:%M:%S")
+    formatted_message = f"[{timestamp}] [WARNING] {message}"
+    print(formatted_message)
+    return formatted_message
 
 def error(message):
-    print(f"[ERROR] {message}")
-    return f"[ERROR] {message}"
+    timestamp = datetime.now().strftime("%H:%M:%S")
+    formatted_message = f"[{timestamp}] [ERROR] {message}"
+    print(formatted_message)
+    return formatted_message
+
+def choose_random_music():
+    """Selects a random music file from the music directory."""
+    if not os.path.exists(MUSIC_DIR):
+        error(f"Music directory {MUSIC_DIR} does not exist")
+        return None
+    
+    music_files = [f for f in os.listdir(MUSIC_DIR) if f.endswith(('.mp3', '.wav'))]
+    if not music_files:
+        warning(f"No music files found in {MUSIC_DIR}")
+        return None
+    
+    return os.path.join(MUSIC_DIR, random.choice(music_files))
 
 class YouTube:
-    def __init__(self, niche, language, text_gen="gemini", image_gen="prodia", tts_engine="elevenlabs", 
-                 tts_voice="Sarah", subtitle_font="Helvetica-Bold", font_size=80, 
-                 text_color="white", highlight_color="blue", api_keys=None):
+    def __init__(self, niche: str, language: str, 
+                 text_gen="gemini", text_model="gemini-2.0-flash", 
+                 image_gen="prodia", image_model="sdxl", 
+                 tts_engine="elevenlabs", tts_voice="Sarah", 
+                 subtitle_font="Helvetica-Bold", font_size=80, 
+                 text_color="white", highlight_color="blue", 
+                 api_keys=None, progress=gr.Progress()) -> None:
+        
+        """Initialize the YouTube Shorts Generator."""
+        self.progress = progress
+        self.progress(0, desc="Initializing")
+        
+        # Store basic parameters
         info(f"Initializing YouTube class")
         self._niche = niche
         self._language = language
         self.text_gen = text_gen
+        self.text_model = text_model
         self.image_gen = image_gen
+        self.image_model = image_model
         self.tts_engine = tts_engine
         self.tts_voice = tts_voice
         self.subtitle_font = subtitle_font
@@ -329,55 +99,109 @@ class YouTube:
         self.images = []
         self.logs = []
         
-        # Set API keys
+        # Set API keys from parameters or environment variables
         if 'gemini' in self.api_keys and self.api_keys['gemini']:
             os.environ["GEMINI_API_KEY"] = self.api_keys['gemini']
+        
         if 'assemblyai' in self.api_keys and self.api_keys['assemblyai']:
             os.environ["ASSEMBLYAI_API_KEY"] = self.api_keys['assemblyai']
+        
         if 'elevenlabs' in self.api_keys and self.api_keys['elevenlabs']:
             os.environ["ELEVENLABS_API_KEY"] = self.api_keys['elevenlabs']
+        
         if 'segmind' in self.api_keys and self.api_keys['segmind']:
             os.environ["SEGMIND_API_KEY"] = self.api_keys['segmind']
+        
+        if 'openai' in self.api_keys and self.api_keys['openai']:
+            os.environ["OPENAI_API_KEY"] = self.api_keys['openai']
             
         info(f"Niche: {niche}, Language: {language}")
         self.log(f"Initialized with niche: {niche}, language: {language}")
-        self.log(f"Text generator: {text_gen}, Image generator: {image_gen}, TTS engine: {tts_engine}")
+        self.log(f"Text generator: {text_gen} - Model: {text_model}")
+        self.log(f"Image generator: {image_gen} - Model: {image_model}")
+        self.log(f"TTS engine: {tts_engine} - Voice: {tts_voice}")
     
     def log(self, message):
-        """Add a log message to the logs list"""
+        """Add a log message to the logs list."""
         timestamp = datetime.now().strftime("%H:%M:%S")
         log_entry = f"[{timestamp}] {message}"
         self.logs.append(log_entry)
         return log_entry
     
     @property
-    def niche(self):
+    def niche(self) -> str:
         return self._niche
     
     @property
-    def language(self):
+    def language(self) -> str:
         return self._language
     
-    def generate_response(self, prompt, model=None):
+    def generate_response(self, prompt: str, model: str = None) -> str:
+        """Generate a response using the selected text generation model."""
         self.log(f"Generating response for prompt: {prompt[:50]}...")
-        if self.text_gen == "gemini":
-            self.log("Using Google's Gemini model")
-            import google.generativeai as genai
-            genai.configure(api_key=os.environ.get("GEMINI_API_KEY", ""))
-            model = genai.GenerativeModel('gemini-2.0-flash')
-            response = model.generate_content(prompt).text
-        else:
-            model_name = model if model else "gpt-3.5-turbo"
-            self.log(f"Using G4F model: {model_name}")
-            response = g4f.ChatCompletion.create(
-                model=model_name,
-                messages=[{"role": "user", "content": prompt}]
-            )
-        self.log(f"Response generated successfully, length: {len(response)} characters")
-        return response
+        
+        try:
+            if self.text_gen == "gemini":
+                self.log("Using Google's Gemini model")
+                
+                # Check if API key is set
+                gemini_api_key = os.environ.get("GEMINI_API_KEY", "")
+                if not gemini_api_key:
+                    raise ValueError("Gemini API key is not set. Please provide a valid API key.")
+                
+                import google.generativeai as genai
+                genai.configure(api_key=gemini_api_key)
+                model_to_use = model if model else self.text_model
+                genai_model = genai.GenerativeModel(model_to_use)
+                response = genai_model.generate_content(prompt).text
+                
+            elif self.text_gen == "g4f":
+                self.log("Using G4F for text generation")
+                import g4f
+                model_to_use = model if model else "gpt-3.5-turbo"
+                self.log(f"Using G4F model: {model_to_use}")
+                response = g4f.ChatCompletion.create(
+                    model=model_to_use,
+                    messages=[{"role": "user", "content": prompt}]
+                )
+                
+            elif self.text_gen == "openai":
+                self.log("Using OpenAI for text generation")
+                openai_api_key = os.environ.get("OPENAI_API_KEY", "")
+                if not openai_api_key:
+                    raise ValueError("OpenAI API key is not set. Please provide a valid API key.")
+                
+                from openai import OpenAI
+                client = OpenAI(api_key=openai_api_key)
+                model_to_use = model if model else "gpt-3.5-turbo"
+                
+                response = client.chat.completions.create(
+                    model=model_to_use,
+                    messages=[{"role": "user", "content": prompt}]
+                ).choices[0].message.content
+                
+            else:
+                # Default to g4f if other methods aren't available
+                self.log(f"Using default G4F model as fallback")
+                import g4f
+                response = g4f.ChatCompletion.create(
+                    model="gpt-3.5-turbo",
+                    messages=[{"role": "user", "content": prompt}]
+                )
+                
+            self.log(f"Response generated successfully, length: {len(response)} characters")
+            return response
+            
+        except Exception as e:
+            error_msg = f"Error generating response: {str(e)}"
+            self.log(error_msg)
+            raise Exception(error_msg)
 
-    def generate_topic(self):
+    def generate_topic(self) -> str:
+        """Generate a topic based on the YouTube Channel niche."""
+        self.progress(0.05, desc="Generating topic")
         self.log("Generating topic based on niche")
+        
         completion = self.generate_response(
             f"Please generate a specific video idea that takes about the following topic: {self.niche}. "
             f"Make it exactly one sentence. Only return the topic, nothing else."
@@ -385,14 +209,17 @@ class YouTube:
 
         if not completion:
             self.log(error("Failed to generate Topic."))
-            return None
+            raise Exception("Failed to generate a topic. Please try again with a different niche.")
 
         self.subject = completion
         self.log(success(f"Generated topic: {completion}"))
         return completion
 
-    def generate_script(self):
+    def generate_script(self) -> str:
+        """Generate a script for a video, based on the subject and language."""
+        self.progress(0.1, desc="Creating script")
         self.log("Generating script for video")
+        
         prompt = f"""
         Generate a script for youtube shorts video, depending on the subject of the video.
 
@@ -421,7 +248,7 @@ class YouTube:
         
         if not completion:
             self.log(error("The generated script is empty."))
-            return None
+            raise Exception("Failed to generate a script. Please try again.")
         
         if len(completion) > 5000:
             self.log(warning("Generated Script is too long. Retrying..."))
@@ -431,8 +258,11 @@ class YouTube:
         self.log(success(f"Generated script ({len(completion)} chars)"))
         return completion
 
-    def generate_metadata(self):
+    def generate_metadata(self) -> dict:
+        """Generate video metadata (title, description)."""
+        self.progress(0.15, desc="Creating title and description")
         self.log("Generating metadata (title and description)")
+        
         title = self.generate_response(
             f"Please generate a YouTube Video Title for the following subject, including hashtags: "
             f"{self.subject}. Only return the title, nothing else. Limit the title under 100 characters."
@@ -453,11 +283,14 @@ class YouTube:
         }
         
         self.log(success(f"Generated title: {title}"))
-        self.log(success(f"Generated description: {description}"))
+        self.log(success(f"Generated description: {description[:50]}..."))
         return self.metadata
     
-    def generate_prompts(self, count=5):
+    def generate_prompts(self, count=5) -> list:
+        """Generate AI Image Prompts based on the provided Video Script."""
+        self.progress(0.2, desc="Creating image prompts")
         self.log(f"Generating {count} image prompts")
+        
         prompt = f"""
         Generate {count} Image Prompts for AI Image Generation,
         depending on the subject of a video.
@@ -491,231 +324,422 @@ class YouTube:
         image_prompts = []
 
         if "image_prompts" in completion:
-            image_prompts = json.loads(completion)["image_prompts"]
-        else:
+            try:
+                image_prompts = json.loads(completion)["image_prompts"]
+            except:
+                self.log(warning("Failed to parse 'image_prompts' from JSON response."))
+                
+        if not image_prompts:
             try:
                 image_prompts = json.loads(completion)
-                self.log(f"Generated Image Prompts: {image_prompts}")
+                self.log(f"Parsed image prompts from JSON response.")
             except Exception:
-                self.log(warning("GPT returned an unformatted response. Attempting to clean..."))
+                self.log(warning("JSON parsing failed. Attempting to extract array using regex..."))
 
                 # Get everything between [ and ], and turn it into a list
                 r = re.compile(r"\[.*\]", re.DOTALL)
                 matches = r.findall(completion)
                 if len(matches) == 0:
-                    self.log(warning("Failed to generate Image Prompts. Retrying..."))
-                    return self.generate_prompts(count)
-                
-                try:
-                    image_prompts = json.loads(matches[0])
-                except:
-                    self.log(error("Failed to parse image prompts JSON"))
-                    # Try a fallback approach - create some generic prompts
+                    self.log(warning("Failed to extract array. Creating generic image prompts."))
+                    # Create generic prompts based on the subject
                     image_prompts = [
-                        f"A beautiful image showing {self.subject}",
-                        f"A detailed visualization of {self.subject}",
-                        f"An artistic representation of {self.subject}",
-                        f"A photorealistic image about {self.subject}",
-                        f"A dramatic scene related to {self.subject}"
+                        f"A beautiful image showing {self.subject}, photorealistic",
+                        f"A detailed visualization of {self.subject}, high quality",
+                        f"An artistic representation of {self.subject}, vibrant colors",
+                        f"A photorealistic image about {self.subject}, high resolution",
+                        f"A dramatic scene related to {self.subject}, cinema quality"
                     ]
+                else:
+                    try:
+                        image_prompts = json.loads(matches[0])
+                    except:
+                        self.log(error("Failed to parse array from regex match."))
+                        # Use regex to extract individual strings
+                        string_pattern = r'"([^"]*)"'
+                        strings = re.findall(string_pattern, matches[0])
+                        if strings:
+                            image_prompts = strings
+                        else:
+                            # Last resort - split by commas and clean up
+                            image_prompts = [
+                                s.strip().strip('"').strip("'") 
+                                for s in matches[0].strip('[]').split(',')
+                            ]
 
-        self.image_prompts = image_prompts[:count]  # Limit to requested count
+        # Ensure we have the requested number of prompts
+        while len(image_prompts) < count:
+            image_prompts.append(f"A high-quality image about {self.subject}")
+            
+        # Limit to the requested count
+        image_prompts = image_prompts[:count]
+        
+        self.image_prompts = image_prompts
         self.log(success(f"Generated {len(self.image_prompts)} Image Prompts"))
         for i, prompt in enumerate(self.image_prompts):
             self.log(f"Image Prompt {i+1}: {prompt}")
-        return self.image_prompts
+            
+        return image_prompts
 
-    def generate_image(self, prompt):
+    def generate_image(self, prompt) -> str:
+        """Generate an image using the selected image generation model."""
         self.log(f"Generating image for prompt: {prompt[:50]}...")
         
-        if self.image_gen == "prodia":
-            self.log("Using Prodia provider for image generation")
-            s = requests.Session()
-            headers = {
-                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-            }
-            
-            # Generate job
-            self.log("Sending generation request to Prodia API")
-            resp = s.get(
-                "https://api.prodia.com/generate",
-                params={
-                    "new": "true",
-                    "prompt": prompt,
-                    "model": "sdxl",  # Default model
-                    "negative_prompt": "verybadimagenegative_v1.3",
-                    "steps": "20",
-                    "cfg": "7",
-                    "seed": random.randint(1, 10000),
-                    "sample": "DPM++ 2M Karras",
-                    "aspect_ratio": "square"
-                },
-                headers=headers
-            )
+        try:
+            image_path = os.path.join(CACHE_DIR, f"img_{len(self.images)}_{int(time.time())}.png")
             
-            job_id = resp.json()['job']
-            self.log(f"Job created with ID: {job_id}")
+            if self.image_gen == "prodia":
+                self.log("Using Prodia provider for image generation")
+                s = requests.Session()
+                headers = {
+                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+                }
+                
+                # Generate job
+                self.log("Sending generation request to Prodia API")
+                resp = s.get(
+                    "https://api.prodia.com/generate",
+                    params={
+                        "new": "true",
+                        "prompt": prompt,
+                        "model": self.image_model,
+                        "negative_prompt": "verybadimagenegative_v1.3",
+                        "steps": "20",
+                        "cfg": "7",
+                        "seed": random.randint(1, 10000),
+                        "sample": "DPM++ 2M Karras",
+                        "aspect_ratio": "square"
+                    },
+                    headers=headers
+                )
+                
+                if resp.status_code != 200:
+                    raise Exception(f"Prodia API error: {resp.text}")
+                
+                job_id = resp.json()['job']
+                self.log(f"Job created with ID: {job_id}")
+                
+                # Wait for generation to complete
+                max_attempts = 30
+                attempts = 0
+                while attempts < max_attempts:
+                    attempts += 1
+                    time.sleep(2)
+                    status = s.get(f"https://api.prodia.com/job/{job_id}", headers=headers).json()
+                    
+                    if status["status"] == "succeeded":
+                        self.log("Image generation successful, downloading result")
+                        img_data = s.get(f"https://images.prodia.xyz/{job_id}.png?download=1", headers=headers).content
+                        with open(image_path, "wb") as f:
+                            f.write(img_data)
+                        self.images.append(image_path)
+                        self.log(success(f"Image saved to: {image_path}"))
+                        return image_path
+                    
+                    elif status["status"] == "failed":
+                        raise Exception(f"Prodia job failed: {status.get('error', 'Unknown error')}")
+                    
+                    # Still processing
+                    self.log(f"Still processing, attempt {attempts}/{max_attempts}...")
+                
+                raise Exception("Prodia job timed out")
             
-            # For demo purposes, simulate waiting
-            self.log("Waiting for image generation to complete...")
-            time.sleep(3)  # Simulate API call
+            elif self.image_gen == "hercai":
+                self.log("Using Hercai provider for image generation")
+                url = f"https://hercai.onrender.com/{self.image_model}/text2image?prompt={prompt}"
+                r = requests.get(url)
+                
+                if r.status_code != 200:
+                    raise Exception(f"Hercai API error: {r.text}")
+                
+                parsed = r.json()
+                if "url" in parsed and parsed["url"]:
+                    self.log("Image URL received from Hercai")
+                    image_url = parsed["url"]
+                    img_data = requests.get(image_url).content
+                    with open(image_path, "wb") as f:
+                        f.write(img_data)
+                    self.images.append(image_path)
+                    self.log(success(f"Image saved to: {image_path}"))
+                    return image_path
+                else:
+                    raise Exception("No image URL in Hercai response")
             
-            # In a real implementation we would poll until completion
-            # For demo, we'll just create a placeholder image
-            image_path = os.path.join(CACHE_DIR, f"image_{len(self.images)}.png")
+            elif self.image_gen == "g4f":
+                self.log("Using G4F provider for image generation")
+                try:
+                    from g4f.client import Client
+                    client = Client()
+                    response = client.images.generate(
+                        model=self.image_model,
+                        prompt=prompt,
+                        response_format="url"
+                    )
+                    
+                    if response and response.data and len(response.data) > 0:
+                        image_url = response.data[0].url
+                        image_response = requests.get(image_url)
+                        
+                        if image_response.status_code == 200:
+                            with open(image_path, "wb") as f:
+                                f.write(image_response.content)
+                            self.images.append(image_path)
+                            self.log(success(f"Image saved to: {image_path}"))
+                            return image_path
+                        else:
+                            raise Exception(f"Failed to download image from {image_url}")
+                    else:
+                        raise Exception("No image URL received from G4F")
+                except Exception as e:
+                    raise Exception(f"G4F image generation failed: {str(e)}")
             
-            # Since we can't actually generate a real image, for demonstration we'll
-            # return a simple example URL that would be the result in a real implementation
-            image_url = "https://images.unsplash.com/photo-1579546929518-9e396f3cc809"
-            self.log(success(f"Image generated and saved (placeholder for demo)"))
-            self.images.append(image_url)
-            return image_url
-        
-        elif self.image_gen == "hercai":
-            self.log("Using Hercai provider for image generation")
-            # For demo purposes, simulate API call
-            time.sleep(2)
-            image_url = "https://images.unsplash.com/photo-1513151233558-d860c5398176"
-            self.log(success(f"Image generated and saved (placeholder for demo)"))
-            self.images.append(image_url)
-            return image_url
+            elif self.image_gen == "segmind":
+                self.log("Using Segmind provider for image generation")
+                api_key = os.environ.get("SEGMIND_API_KEY", "")
+                if not api_key:
+                    raise ValueError("Segmind API key is not set. Please provide a valid API key.")
+                
+                headers = {
+                    "x-api-key": api_key,
+                    "Content-Type": "application/json"
+                }
+                
+                response = requests.post(
+                    "https://api.segmind.com/v1/sdxl-turbo",
+                    json={
+                        "prompt": prompt,
+                        "negative_prompt": "blurry, low quality, distorted face, text, watermark",
+                        "samples": 1,
+                        "size": "1024x1024",
+                        "guidance_scale": 1.0
+                    },
+                    headers=headers
+                )
+                
+                if response.status_code == 200:
+                    with open(image_path, "wb") as f:
+                        f.write(response.content)
+                    self.images.append(image_path)
+                    self.log(success(f"Image saved to: {image_path}"))
+                    return image_path
+                else:
+                    raise Exception(f"Segmind request failed: {response.status_code} {response.text}")
             
-        elif self.image_gen == "segmind":
-            self.log("Using Segmind provider for image generation")
-            # For demo purposes, simulate API call
-            time.sleep(2)
-            image_url = "https://images.unsplash.com/photo-1618005182384-a83a8bd57fbe"
-            self.log(success(f"Image generated and saved (placeholder for demo)"))
-            self.images.append(image_url)
-            return image_url
+            elif self.image_gen == "pollinations":
+                self.log("Using Pollinations provider for image generation")
+                response = requests.get(f"https://image.pollinations.ai/prompt/{prompt}{random.randint(1,10000)}")
+                
+                if response.status_code == 200:
+                    self.log("Image received from Pollinations")
+                    with open(image_path, "wb") as f:
+                        f.write(response.content)
+                    self.images.append(image_path)
+                    self.log(success(f"Image saved to: {image_path}"))
+                    return image_path
+                else:
+                    raise Exception(f"Pollinations request failed with status code: {response.status_code}")
             
-        elif self.image_gen == "pollinations":
-            self.log("Using Pollinations provider for image generation")
-            # For demo purposes, simulate API call
-            time.sleep(2)
-            image_url = "https://images.unsplash.com/photo-1550859492-d5da9d8e45f3"
-            self.log(success(f"Image generated and saved (placeholder for demo)"))
-            self.images.append(image_url)
-            return image_url
+            else:
+                # Default to generating a colored placeholder image
+                self.log(f"Unknown provider '{self.image_gen}'. Generating placeholder image.")
+                
+                # Create a placeholder colored image with the prompt text
+                img = Image.new('RGB', (800, 800), color=(random.randint(0, 255), 
+                                                         random.randint(0, 255), 
+                                                         random.randint(0, 255)))
+                img.save(image_path)
+                self.images.append(image_path)
+                self.log(warning(f"Created placeholder image at: {image_path}"))
+                return image_path
+                
+        except Exception as e:
+            error_msg = f"Image generation failed: {str(e)}"
+            self.log(error(error_msg))
             
-        else:  # Default or g4f
-            self.log("Using default provider for image generation")
-            # For demo purposes, simulate API call
-            time.sleep(2)
-            image_url = "https://images.unsplash.com/photo-1541701494587-cb58502866ab"
-            self.log(success(f"Image generated and saved (placeholder for demo)"))
-            self.images.append(image_url)
-            return image_url
+            # Create a fallback image
+            try:
+                img = Image.new('RGB', (800, 800), color=(200, 200, 200))
+                image_path = os.path.join(CACHE_DIR, f"error_img_{len(self.images)}_{int(time.time())}.png")
+                img.save(image_path)
+                self.images.append(image_path)
+                self.log(warning(f"Created error placeholder image at: {image_path}"))
+                return image_path
+            except:
+                # If all else fails, return None and handle it gracefully
+                return None
 
-    def generate_speech(self, text, output_format='mp3'):
+    def generate_speech(self, text, output_format='mp3') -> str:
+        """Generate speech from text using the selected TTS engine."""
+        self.progress(0.6, desc="Creating voiceover")
         self.log("Generating speech from text")
         
         # Clean text
-        text = re.sub(r'[^\w\s.?!]', '', text)
+        text = re.sub(r'[^\w\s.?!,;:\'"-]', '', text)
         
         self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
         
-        audio_path = os.path.join(CACHE_DIR, f"speech.{output_format}")
+        audio_path = os.path.join(CACHE_DIR, f"speech_{int(time.time())}.{output_format}")
         
-        if self.tts_engine == "elevenlabs":
-            self.log("Using ElevenLabs provider for speech generation")
-            # For demo purposes, we'll just simulate the API call
-            self.log("Simulating ElevenLabs API call (would use real API in production)")
-            time.sleep(3)  # Simulate API call
-            self.tts_path = audio_path
-            return audio_path
+        try:
+            if self.tts_engine == "elevenlabs":
+                self.log("Using ElevenLabs provider for speech generation")
+                elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY", "")
+                if not elevenlabs_api_key:
+                    raise ValueError("ElevenLabs API key is not set. Please provide a valid API key.")
                 
-        elif self.tts_engine == 'bark':
-            self.log("Using Bark provider for speech generation")
-            # For demo purposes, simulate API call
-            time.sleep(3)
-            self.tts_path = audio_path
-            return audio_path
-            
-        elif self.tts_engine == "gtts":
-            self.log("Using Google TTS provider for speech generation")
-            # For demo purposes, simulate API call
-            time.sleep(2)
-            self.tts_path = audio_path
-            return audio_path
+                headers = {
+                    "Accept": "audio/mpeg",
+                    "Content-Type": "application/json",
+                    "xi-api-key": elevenlabs_api_key
+                }
+                
+                payload = {
+                    "text": text,
+                    "model_id": "eleven_monolingual_v1",
+                    "voice_settings": {
+                        "stability": 0.5,
+                        "similarity_boost": 0.5,
+                        "style": 0.0,
+                        "use_speaker_boost": True
+                    }
+                }
+                
+                voice_id = self.tts_voice if self.tts_voice not in ["Sarah", "default"] else "21m00Tcm4TlvDq8ikWAM"
+                
+                response = requests.post(
+                    url=f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
+                    json=payload,
+                    headers=headers
+                )
+                
+                if response.status_code == 200:
+                    with open(audio_path, 'wb') as f:
+                        f.write(response.content)
+                    self.log(success(f"Speech generated successfully using ElevenLabs at {audio_path}"))
+                else:
+                    raise Exception(f"ElevenLabs API error: {response.text}")
+                    
+            elif self.tts_engine == "gtts":
+                self.log("Using Google TTS provider for speech generation")
+                from gtts import gTTS
+                tts = gTTS(text=text, lang=self.language[:2].lower(), slow=False)
+                tts.save(audio_path)
+                
+            elif self.tts_engine == "openai":
+                self.log("Using OpenAI provider for speech generation")
+                openai_api_key = os.environ.get("OPENAI_API_KEY", "")
+                if not openai_api_key:
+                    raise ValueError("OpenAI API key is not set. Please provide a valid API key.")
+                
+                from openai import OpenAI
+                client = OpenAI(api_key=openai_api_key)
+                
+                voice = self.tts_voice if self.tts_voice else "alloy"
+                response = client.audio.speech.create(
+                    model="tts-1",
+                    voice=voice,
+                    input=text
+                )
+                response.stream_to_file(audio_path)
+                
+            elif self.tts_engine == "edge":
+                self.log("Using Edge TTS provider for speech generation")
+                import edge_tts
+                import asyncio
+                
+                voice = self.tts_voice if self.tts_voice else "en-US-AriaNeural"
+                
+                async def generate():
+                    communicate = edge_tts.Communicate(text, voice)
+                    await communicate.save(audio_path)
+                
+                asyncio.run(generate())
             
-        elif self.tts_engine == "openai":
-            self.log("Using OpenAI provider for speech generation")
-            # For demo purposes, simulate API call
-            time.sleep(3)
-            self.tts_path = audio_path
-            return audio_path
+            else:
+                # Fallback to gtts
+                self.log(f"Unknown TTS engine '{self.tts_engine}'. Falling back to gTTS.")
+                from gtts import gTTS
+                tts = gTTS(text=text, lang=self.language[:2].lower(), slow=False)
+                tts.save(audio_path)
             
-        elif self.tts_engine == "edge":
-            self.log("Using Edge TTS provider for speech generation")
-            # For demo purposes, simulate API call
-            time.sleep(2)
+            self.log(success(f"Speech generated and saved to: {audio_path}"))
             self.tts_path = audio_path
             return audio_path
             
-        else:
-            self.log(f"Using default TTS engine (would use {self.tts_engine} in production)")
-            # For demo purposes, simulate API call
-            time.sleep(2)
-            self.tts_path = audio_path
-            return audio_path
+        except Exception as e:
+            error_msg = f"Speech generation failed: {str(e)}"
+            self.log(error(error_msg))
             
-        self.log(success(f"Speech generated and saved to: {audio_path}"))
-        self.tts_path = audio_path
-        return audio_path
+            # Create a silent audio file as fallback
+            try:
+                from pydub import AudioSegment
+                from pydub.generators import Sine
+                
+                # Generate 30 seconds of silence
+                silence = AudioSegment.silent(duration=30000)
+                silence.export(audio_path, format=output_format)
+                
+                self.log(warning(f"Created silent audio fallback at: {audio_path}"))
+                self.tts_path = audio_path
+                return audio_path
+            except:
+                self.log(error("Failed to create silent audio fallback"))
+                return None
 
     def generate_subtitles(self, audio_path):
-        self.log("Generating word-level subtitles for video")
-
-        # Define constants
-        FONT = self.subtitle_font
-        FONTSIZE = self.font_size
-        COLOR = self.text_color
-        BG_COLOR = self.highlight_color
-        FRAME_SIZE = (1080, 1920)
-        MAX_CHARS = 30
-        MAX_DURATION = 3.0
-        MAX_GAP = 2.5
+        """Generate word-level subtitles for the video."""
+        self.progress(0.65, desc="Creating subtitles")
+        self.log("Starting subtitle generation process")
 
         try:
-            # In a real implementation, we would use AssemblyAI to transcribe
-            self.log("In a production environment, this would use AssemblyAI for transcription")
+            assemblyai_api_key = os.environ.get("ASSEMBLYAI_API_KEY", "")
             
-            # For demo purposes, we'll simulate the word-level data
-            self.log("Simulating transcription with word-level timing")
-            words = self.script.split()
-            total_duration = 60  # Assume 60 seconds for demo
-            avg_word_duration = total_duration / len(words)
+            if not assemblyai_api_key:
+                self.log(warning("AssemblyAI API key not set. Generating simulated subtitles."))
+                return self._generate_simulated_subtitles()
             
-            wordlevel_info = []
-            current_time = 0
+            import assemblyai as aai
+            aai.settings.api_key = assemblyai_api_key
+            
+            config = aai.TranscriptionConfig(speaker_labels=False, word_boost=[], format_text=True)
+            transcriber = aai.Transcriber(config=config)
+            
+            self.log("Submitting audio for transcription")
+            transcript = transcriber.transcribe(audio_path)
             
-            for word in words:
-                # Calculate a slightly randomized duration based on word length
-                word_duration = avg_word_duration * (0.5 + (len(word) / 10))
+            if not transcript or not transcript.words:
+                self.log(warning("Transcription returned no words. Using simulated subtitles."))
+                return self._generate_simulated_subtitles()
                 
+            # Process word-level information
+            wordlevel_info = []
+            for word in transcript.words:
                 word_data = {
-                    "word": word.strip(),
-                    "start": current_time,
-                    "end": current_time + word_duration
+                    "word": word.text.strip(),
+                    "start": word.start / 1000.0,
+                    "end": word.end / 1000.0
                 }
                 wordlevel_info.append(word_data)
-                current_time += word_duration
             
-            self.log(success(f"Generated word-level timing for {len(wordlevel_info)} words"))
+            self.log(success(f"Transcription successful. Got {len(wordlevel_info)} words."))
+            
+            # Define constants for subtitle generation
+            FONT = self.subtitle_font
+            FONTSIZE = self.font_size
+            COLOR = self.text_color
+            BG_COLOR = self.highlight_color
+            FRAME_SIZE = (1080, 1920)
+            MAX_CHARS = 30
+            MAX_DURATION = 3.0
+            MAX_GAP = 2.5
             
-            # Process into line-level data (simplified for demo)
+            # Split text into lines based on character count, duration, and gap
             subtitles = []
             line = []
             line_duration = 0
-            
-            for idx, word_data in enumerate(wordlevel_info):
-                word = word_data["word"]
-                start = word_data["start"]
-                end = word_data["end"]
 
+            for idx, word_data in enumerate(wordlevel_info):
                 line.append(word_data)
-                line_duration += end - start
+                line_duration += word_data["end"] - word_data["start"]
                 temp = " ".join(item["word"] for item in line)
                 new_line_chars = len(temp)
                 duration_exceeded = line_duration > MAX_DURATION
@@ -751,365 +775,683 @@ class YouTube:
                 subtitles.append(subtitle_line)
             
             self.log(success(f"Generated {len(subtitles)} subtitle lines"))
-            
-            # In a real implementation, we would create TextClips for MoviePy
-            # For the demo, we'll just return the subtitle data
             return {
                 "wordlevel": wordlevel_info,
                 "linelevel": subtitles
             }
-
+            
         except Exception as e:
-            self.log(error(f"Subtitle generation failed: {str(e)}"))
-            return None
-
-    def combine(self):
-        self.log("Combining images and audio into final video")
-        
-        # For demonstration purposes, we're simulating the video creation process
-        combined_video_path = os.path.join(CACHE_DIR, "output.mp4")
-        
-        # In a real implementation, this would:
-        # 1. Create ImageClips from each image
-        # 2. Create an audio clip from the speech
-        # 3. Add background music
-        # 4. Add word-level subtitles
-        # 5. Combine everything into a final video
-        
-        self.log("This would create a vertical (9:16) video with:")
-        self.log(f"- {len(self.images)} images as a slideshow")
-        self.log("- TTS audio as the main audio track")
-        self.log("- Background music at low volume")
-        self.log("- Word-level subtitles that highlight as words are spoken")
-        
-        # For demo purposes, simulate video processing
-        self.log("Processing video (simulated for demo)...")
-        time.sleep(3)
-        
-        success_msg = f"Video successfully created at: {combined_video_path}"
-        self.log(success(success_msg))
-        self.video_path = combined_video_path
-        
-        # For the demo, we'll return a mock result
-        return {
-            'video_path': combined_video_path,
-            'images': self.images,
-            'audio_path': self.tts_path,
-            'metadata': self.metadata
-        }
-
-    def generate_video(self):
-        """Generate complete video with all components"""
-        self.log("Starting video generation process")
-        
-        # Step 1: Generate topic
-        self.log("Generating topic")
-        self.generate_topic()
+            error_msg = f"Subtitle generation failed: {str(e)}"
+            self.log(error(error_msg))
+            return self._generate_simulated_subtitles()
+    
+    def _generate_simulated_subtitles(self):
+        """Generate simulated subtitles when AssemblyAI is not available."""
+        self.log("Generating simulated subtitles")
         
-        # Step 2: Generate script
-        self.log("Generating script")
-        self.generate_script()
+        # Split script into words
+        words = self.script.split()
         
-        # Step 3: Generate metadata
-        self.log("Generating metadata")
-        self.generate_metadata()
+        # Estimate audio duration based on word count (average speaking rate)
+        estimated_duration = len(words) * 0.3  # 0.3 seconds per word on average
         
-        # Step 4: Generate image prompts
-        self.log("Generating image prompts")
-        self.generate_prompts()
+        # Generate word-level timings
+        wordlevel_info = []
+        current_time = 0
         
-        # Step 5: Generate images
-        self.log("Generating images")
-        for i, prompt in enumerate(self.image_prompts, 1):
-            self.log(f"Generating image {i}/{len(self.image_prompts)}")
-            self.generate_image(prompt)
+        for word in words:
+            # Adjust duration based on word length
+            word_duration = 0.2 + min(0.05 * len(word), 0.3)  # Between 0.2 and 0.5 seconds
+            
+            word_data = {
+                "word": word,
+                "start": current_time,
+                "end": current_time + word_duration
+            }
+            wordlevel_info.append(word_data)
+            
+            # Add a small gap between words
+            current_time += word_duration + 0.05
         
-        # Step 6: Generate speech
-        self.log("Generating speech")
-        self.generate_speech(self.script)
+        # Generate line-level subtitles
+        subtitles = []
+        line = []
+        line_start = 0
+        line_text = ""
         
-        # Step 7: Generate subtitles
-        self.log("Generating subtitles")
-        self.generate_subtitles(self.tts_path)
+        for word_data in wordlevel_info:
+            # Check if adding this word would exceed character limit
+            if len(line_text + " " + word_data["word"]) > 30 and line:
+                # Finalize current line
+                subtitle_line = {
+                    "text": line_text,
+                    "start": line_start,
+                    "end": line[-1]["end"],
+                    "words": line.copy()
+                }
+                subtitles.append(subtitle_line)
+                
+                # Start new line
+                line = [word_data]
+                line_start = word_data["start"]
+                line_text = word_data["word"]
+            else:
+                # Add word to current line
+                line.append(word_data)
+                line_text = (line_text + " " + word_data["word"]).strip()
+                if len(line) == 1:
+                    line_start = word_data["start"]
         
-        # Step 8: Combine all elements into final video
-        self.log("Combining all elements into final video")
-        result = self.combine()
+        # Add final line if not empty
+        if line:
+            subtitle_line = {
+                "text": line_text,
+                "start": line_start,
+                "end": line[-1]["end"],
+                "words": line
+            }
+            subtitles.append(subtitle_line)
         
-        self.log(f"Video generation complete.")
+        self.log(success(f"Generated {len(wordlevel_info)} simulated word timings and {len(subtitles)} subtitle lines"))
         
         return {
-            'video_path': result['video_path'],
-            'images': result['images'],
-            'audio_path': self.tts_path,
-            'title': self.metadata['title'],
-            'description': self.metadata['description'],
-            'subject': self.subject,
-            'script': self.script,
-            'logs': self.logs
+            "wordlevel": wordlevel_info,
+            "linelevel": subtitles
         }
 
-# Gradio interface
-def create_youtube_short(niche, language, gemini_api_key="", assemblyai_api_key="", 
-                         elevenlabs_api_key="", segmind_api_key="", text_gen="gemini",
-                         image_gen="prodia", tts_engine="elevenlabs", tts_voice="Sarah",
-                         subtitle_font="Helvetica-Bold", font_size=80, text_color="white", 
-                         highlight_color="blue"):
-    
-    # Create API keys dictionary
-    api_keys = {
-        'gemini': gemini_api_key,
-        'assemblyai': assemblyai_api_key,
-        'elevenlabs': elevenlabs_api_key,
-        'segmind': segmind_api_key
-    }
-    
-    # Initialize YouTube class
-    yt = YouTube(
-        niche=niche,
-        language=language,
-        text_gen=text_gen,
-        image_gen=image_gen,
-        tts_engine=tts_engine,
-        tts_voice=tts_voice,
-        subtitle_font=subtitle_font,
-        font_size=font_size,
-        text_color=text_color,
-        highlight_color=highlight_color,
-        api_keys=api_keys
-    )
-    
-    # Generate video
-    result = yt.generate_video()
-    
-    # In a real implementation we would return the actual video file
-    # For demo, we'll just simulate it with a placeholder
-    demo_video = "https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4"
-    
-    # Return all the relevant information for the UI
-    return {
-        "video": demo_video,
-        "title": result['title'],
-        "description": result['description'],
-        "script": result['script'],
-        "logs": "\n".join(result['logs'])
-    }
+    def combine(self) -> str:
+        """Combine images, audio, and subtitles into a final video."""
+        self.progress(0.8, desc="Creating final video")
+        self.log("Combining images and audio into final video")
+        
+        try:
+            output_path = os.path.join(CACHE_DIR, f"output_{int(time.time())}.mp4")
+            
+            # Check for required files
+            if not self.images:
+                raise ValueError("No images available for video creation")
+            
+            if not hasattr(self, 'tts_path') or not self.tts_path or not os.path.exists(self.tts_path):
+                raise ValueError("No TTS audio file available")
+            
+            # Load audio
+            tts_clip = AudioFileClip(self.tts_path)
+            max_duration = tts_clip.duration
+            
+            # Calculate duration for each image
+            num_images = len(self.images)
+            req_dur = max_duration / num_images
+            
+            # Create video clips from images
+            clips = []
+            tot_dur = 0
+            
+            # Loop through images, repeating if necessary to fill audio duration
+            while tot_dur < max_duration:
+                for image_path in self.images:
+                    # Check if image exists and is valid
+                    if not os.path.exists(image_path):
+                        self.log(warning(f"Image not found: {image_path}, skipping"))
+                        continue
+                    
+                    try:
+                        clip = ImageClip(image_path)
+                        clip = clip.set_duration(req_dur)
+                        clip = clip.set_fps(30)
+                        
+                        # Handle aspect ratio (vertical video for shorts)
+                        aspect_ratio = 9/16  # Standard vertical video ratio
+                        if clip.w / clip.h < aspect_ratio:
+                            # Image is too tall, crop height
+                            clip = crop(
+                                clip, 
+                                width=clip.w, 
+                                height=round(clip.w / aspect_ratio), 
+                                x_center=clip.w / 2, 
+                                y_center=clip.h / 2
+                            )
+                        else:
+                            # Image is too wide, crop width
+                            clip = crop(
+                                clip, 
+                                width=round(aspect_ratio * clip.h), 
+                                height=clip.h, 
+                                x_center=clip.w / 2, 
+                                y_center=clip.h / 2
+                            )
+                        
+                        # Resize to standard size for shorts
+                        clip = clip.resize((1080, 1920))
+                        clips.append(clip)
+                        tot_dur += clip.duration
+                        
+                        # If we've exceeded the duration, break
+                        if tot_dur >= max_duration:
+                            break
+                    except Exception as e:
+                        self.log(warning(f"Error processing image {image_path}: {str(e)}"))
+            
+            # Create video from clips
+            self.log(f"Creating video from {len(clips)} clips")
+            final_clip = concatenate_videoclips(clips)
+            final_clip = final_clip.set_fps(30)
+            
+            # Add background music if available
+            background_music = choose_random_music()
+            if background_music and os.path.exists(background_music):
+                self.log(f"Adding background music: {background_music}")
+                try:
+                    music_clip = AudioFileClip(background_music)
+                    # Loop music if it's shorter than the video
+                    if music_clip.duration < max_duration:
+                        repeats = int(max_duration / music_clip.duration) + 1
+                        music_clip = concatenate_audioclips([music_clip] * repeats)
+                    # Trim if it's longer
+                    music_clip = music_clip.subclip(0, max_duration)
+                    # Reduce volume
+                    music_clip = music_clip.fx(volumex, 0.1)
+                    
+                    # Combine audio tracks
+                    comp_audio = CompositeAudioClip([tts_clip, music_clip])
+                    final_clip = final_clip.set_audio(comp_audio)
+                except Exception as e:
+                    self.log(warning(f"Error adding background music: {str(e)}"))
+                    final_clip = final_clip.set_audio(tts_clip)
+            else:
+                self.log("No background music found, using TTS audio only")
+                final_clip = final_clip.set_audio(tts_clip)
+            
+            # Set final duration
+            final_clip = final_clip.set_duration(tts_clip.duration)
+            
+            # Generate subtitles if available
+            subtitle_clips = []
+            subtitles = self.generate_subtitles(self.tts_path)
+            
+            if subtitles and 'wordlevel' in subtitles:
+                self.log("Adding word-level subtitles")
+                
+                from moviepy.video.tools.subtitles import TextClip
+                
+                # Define subtitle styles
+                font = self.subtitle_font if os.path.exists(os.path.join(FONTS_DIR, f"{self.subtitle_font}.ttf")) else None
+                fontsize = self.font_size
+                color = self.text_color
+                bg_color = self.highlight_color
+                
+                # Add subtitles as highlighted words
+                for subtitle in subtitles['linelevel']:
+                    full_duration = subtitle['end'] - subtitle['start']
+                    
+                    # Calculate position on screen (bottom centered)
+                    frame_width, frame_height = 1080, 1920
+                    x_pos = 0
+                    y_pos = frame_height * 0.85  # Position at 85% of frame height
+                    x_buffer = frame_width * 1 / 10
+                    
+                    # Add each word with proper timing and highlighting
+                    for word_data in subtitle['words']:
+                        word = word_data['word']
+                        start = word_data['start']
+                        end = word_data['end']
+                        
+                        # Create text clip for word
+                        try:
+                            word_clip = TextClip(
+                                txt=word, 
+                                font=font,
+                                fontsize=fontsize, 
+                                color=color,
+                                bg_color=bg_color,
+                                stroke_color='black',
+                                stroke_width=1
+                            ).set_position((x_pos + x_buffer, y_pos)).set_start(start).set_duration(end - start)
+                            
+                            subtitle_clips.append(word_clip)
+                            x_pos += word_clip.w + 10  # Add spacing between words
+                            
+                            # Wrap to next line if needed
+                            if x_pos + word_clip.w > frame_width - 2 * x_buffer:
+                                x_pos = 0
+                                y_pos += word_clip.h + 10
+                        except Exception as e:
+                            self.log(warning(f"Error creating subtitle for word '{word}': {str(e)}"))
+            
+            # Add subtitles to video if any were created
+            if subtitle_clips:
+                self.log(f"Adding {len(subtitle_clips)} subtitle clips to video")
+                final_clip = CompositeVideoClip([final_clip] + subtitle_clips)
+            
+            # Write final video
+            self.log("Writing final video file")
+            final_clip.write_videofile(output_path, threads=4, codec='libx264', audio_codec='aac')
+            
+            success_msg = f"Video successfully created at: {output_path}"
+            self.log(success(success_msg))
+            self.video_path = output_path
+            
+            return output_path
+            
+        except Exception as e:
+            error_msg = f"Error combining video: {str(e)}"
+            self.log(error(error_msg))
+            
+            # Create a minimal fallback video if possible
+            try:
+                # Try to create a simple video with just the first image and audio
+                fallback_path = os.path.join(CACHE_DIR, f"fallback_{int(time.time())}.mp4")
+                
+                if self.images and os.path.exists(self.images[0]) and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
+                    img_clip = ImageClip(self.images[0]).set_duration(10)
+                    img_clip = img_clip.resize((1080, 1920))
+                    audio_clip = AudioFileClip(self.tts_path).subclip(0, min(10, AudioFileClip(self.tts_path).duration))
+                    video_clip = img_clip.set_audio(audio_clip)
+                    video_clip.write_videofile(fallback_path, threads=2, codec='libx264', audio_codec='aac')
+                    
+                    self.log(warning(f"Created fallback video at: {fallback_path}"))
+                    self.video_path = fallback_path
+                    return fallback_path
+                else:
+                    raise Exception("Cannot create fallback video: missing images or audio")
+            except Exception as fallback_error:
+                self.log(error(f"Failed to create fallback video: {str(fallback_error)}"))
+                return None
 
-# Create Gradio app
-with gr.Blocks() as demo:
-    gr.Markdown("# YouTube Shorts Generator")
-    gr.Markdown("Generate short videos based on a niche and language")
-    
-    with gr.Row():
-        with gr.Column(scale=1):
-            with gr.Group():
-                gr.Markdown("### Required Inputs")
-                niche = gr.Textbox(label="Niche/Topic", placeholder="E.g., Fitness tips, Technology facts")
-                language = gr.Dropdown(
-                    choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", 
-                             "Russian", "Japanese", "Chinese", "Hindi"],
-                    label="Language",
-                    value="English"
-                )
+    def generate_video(self) -> dict:
+        """Generate complete video with all components."""
+        try:
+            self.log("Starting video generation process")
             
-            with gr.Accordion("API Keys", open=False):
-                gemini_api_key = gr.Textbox(label="Gemini API Key", type="password")
-                assemblyai_api_key = gr.Textbox(label="AssemblyAI API Key", type="password")
-                elevenlabs_api_key = gr.Textbox(label="ElevenLabs API Key", type="password")
-                segmind_api_key = gr.Textbox(label="Segmind API Key", type="password")
+            # Step 1: Generate topic
+            self.log("Generating topic")
+            self.generate_topic()
             
-            with gr.Accordion("Model Selection", open=False):
-                text_gen = gr.Dropdown(
-                    choices=["gemini", "g4f"], 
-                    label="Text Generator",
-                    value="gemini"
-                )
-                image_gen = gr.Dropdown(
-                    choices=["prodia", "hercai", "g4f", "segmind", "pollinations"],
-                    label="Image Generator",
-                    value="prodia"
-                )
-                tts_engine = gr.Dropdown(
-                    choices=["elevenlabs", "bark", "gtts", "openai", "edge", "local_tts", "xtts", "rvc"],
-                    label="Text-to-Speech Engine",
-                    value="elevenlabs"
-                )
-                tts_voice = gr.Textbox(
-                    label="TTS Voice",
-                    placeholder="E.g., Sarah, Brian, Lily, Monika Sogam",
-                    value="Sarah"
-                )
+            # Step 2: Generate script
+            self.progress(0.1, desc="Creating script")
+            self.log("Generating script")
+            self.generate_script()
             
-            with gr.Accordion("Subtitle Options", open=False):
-                subtitle_font = gr.Dropdown(
-                    choices=["Helvetica-Bold", "Arial-Bold", "Impact", "Comic-Sans-MS"],
-                    label="Font",
-                    value="Helvetica-Bold"
-                )
-                font_size = gr.Slider(
-                    minimum=40,
-                    maximum=120,
-                    value=80,
-                    step=5,
-                    label="Font Size"
-                )
-                with gr.Row():
-                    text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
-                    highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
+            # Step 3: Generate metadata
+            self.progress(0.2, desc="Creating metadata")
+            self.log("Generating metadata")
+            self.generate_metadata()
             
-            generate_btn = gr.Button("Generate Video", variant="primary")
-        
-        with gr.Column(scale=1):
-            video_output = gr.Video(label="Generated Video")
-            title_output = gr.Textbox(label="Title")
-            description_output = gr.Textbox(label="Description", lines=3)
-            script_output = gr.Textbox(label="Script", lines=5)
-            log_output = gr.Textbox(label="Process Log", lines=10)
-    
-    # Set up the function to call when the generate button is clicked
-    generate_btn.click(
-        fn=create_youtube_short,
-        inputs=[
-            niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key,
-            segmind_api_key, text_gen, image_gen, tts_engine, tts_voice,
-            subtitle_font, font_size, text_color, highlight_color
+            # Step 4: Generate image prompts
+            self.progress(0.3, desc="Creating image prompts")
+            self.log("Generating image prompts")
+            self.generate_prompts()
+            
+            # Step 5: Generate images
+            self.progress(0.4, desc="Generating images")
+            self.log("Generating images")
+            for i, prompt in enumerate(self.image_prompts, 1):
+                self.progress(0.4 + 0.2 * (i / len(self.image_prompts)), 
+                             desc=f"Generating image {i}/{len(self.image_prompts)}")
+                self.log(f"Generating image {i}/{len(self.image_prompts)}")
+                self.generate_image(prompt)
+            
+            # Step 6: Generate speech
+            self.progress(0.6, desc="Creating speech")
+            self.log("Generating speech")
+            self.generate_speech(self.script)
+            
+            # Step 7: Combine all elements into final video
+            self.progress(0.8, desc="Creating final video")
+            self.log("Combining all elements into final video")
+            path = self.combine()
+            
+            self.progress(0.95, desc="Finalizing")
+            self.log(f"Video generation complete. File saved at: {path}")
+            
+            # Return the result
+            return {
+                'video_path': path,
+                'title': self.metadata['title'],
+                'description': self.metadata['description'],
+                'subject': self.subject,
+                'script': self.script,
+                'logs': self.logs
+            }
+            
+        except Exception as e:
+            error_msg = f"Error during video generation: {str(e)}"
+            self.log(error(error_msg))
+            raise Exception(error_msg)
+
+# Data for dynamic dropdowns
+def get_text_generator_models(generator):
+    """Get available models for the selected text generator."""
+    models = {
+        "gemini": [
+            "gemini-2.0-flash", 
+            "gemini-2.0-flash-lite", 
+            "gemini-1.5-flash", 
+            "gemini-1.5-flash-8b", 
+            "gemini-1.5-pro"
         ],
-        outputs={
-            "video": video_output,
-            "title": title_output,
-            "description": description_output,
-            "script": script_output,
-            "logs": log_output
-        }
-    )
+        "g4f": [
+            "gpt-3.5-turbo", 
+            "gpt-4", 
+            "gpt-4o", 
+            "llama-3-70b-chat", 
+            "claude-3-opus-20240229", 
+            "claude-3-sonnet-20240229", 
+            "claude-3-haiku-20240307"
+        ],
+        "openai": [
+            "gpt-3.5-turbo", 
+            "gpt-4-turbo", 
+            "gpt-4o"
+        ]
+    }
+    return models.get(generator, ["default"])
 
-# Launch the app
-if __name__ == "__main__":
-    demo.launch()
-            </pre>
-        </div>
-    </div>
+def get_image_generator_models(generator):
+    """Get available models for the selected image generator."""
+    models = {
+        "prodia": [
+            "sdxl", 
+            "realvisxl", 
+            "juggernaut", 
+            "dreamshaper", 
+            "dalle"
+        ],
+        "hercai": [
+            "v1", 
+            "v2", 
+            "v3", 
+            "lexica"
+        ],
+        "g4f": [
+            "dall-e-3", 
+            "dall-e-2", 
+            "midjourney"
+        ],
+        "segmind": [
+            "sdxl-turbo", 
+            "realistic-vision", 
+            "sd3"
+        ],
+        "pollinations": [
+            "default"
+        ]
+    }
+    return models.get(generator, ["default"])
 
-    <script>
-        // Initialize dark mode based on user preference
-        if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
-            document.documentElement.classList.add('dark');
-        }
+def get_tts_voices(engine):
+    """Get available voices for the selected TTS engine."""
+    voices = {
+        "elevenlabs": [
+            "Sarah", 
+            "Brian", 
+            "Lily", 
+            "Monika Sogam", 
+            "George", 
+            "River", 
+            "Matilda", 
+            "Will", 
+            "Jessica"
+        ],
+        "openai": [
+            "alloy", 
+            "echo", 
+            "fable", 
+            "onyx", 
+            "nova", 
+            "shimmer"
+        ],
+        "edge": [
+            "en-US-AriaNeural", 
+            "en-US-GuyNeural", 
+            "en-GB-SoniaNeural", 
+            "en-AU-NatashaNeural"
+        ],
+        "gtts": [
+            "en", 
+            "es", 
+            "fr", 
+            "de", 
+            "it", 
+            "pt", 
+            "ru", 
+            "ja", 
+            "zh", 
+            "hi"
+        ]
+    }
+    return voices.get(engine, ["default"])
+
+# Create the Gradio interface
+def create_interface():
+    with gr.Blocks(title="YouTube Shorts Generator") as demo:
+        gr.Markdown("# YouTube Shorts Generator")
+        gr.Markdown("Generate engaging YouTube Shorts videos with just a niche and language selection.")
         
-        window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', event => {
-            if (event.matches) {
-                document.documentElement.classList.add('dark');
-            } else {
-                document.documentElement.classList.remove('dark');
-            }
-        });
+        with gr.Row():
+            with gr.Column(scale=1):
+                # Main Inputs
+                with gr.Group():
+                    gr.Markdown("### Required Inputs")
+                    niche = gr.Textbox(label="Niche/Topic", placeholder="E.g., Fitness tips, Technology facts, Travel destinations")
+                    language = gr.Dropdown(
+                        choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", 
+                                "Russian", "Japanese", "Chinese", "Hindi"],
+                        label="Language",
+                        value="English"
+                    )
+                
+                # API Keys
+                with gr.Accordion("API Keys", open=False):
+                    gemini_api_key = gr.Textbox(
+                        label="Gemini API Key", 
+                        type="password",
+                        value=os.environ.get("GEMINI_API_KEY", "")
+                    )
+                    assemblyai_api_key = gr.Textbox(
+                        label="AssemblyAI API Key", 
+                        type="password",
+                        value=os.environ.get("ASSEMBLYAI_API_KEY", "")
+                    )
+                    elevenlabs_api_key = gr.Textbox(
+                        label="ElevenLabs API Key", 
+                        type="password",
+                        value=os.environ.get("ELEVENLABS_API_KEY", "")
+                    )
+                    segmind_api_key = gr.Textbox(
+                        label="Segmind API Key", 
+                        type="password",
+                        value=os.environ.get("SEGMIND_API_KEY", "")
+                    )
+                    openai_api_key = gr.Textbox(
+                        label="OpenAI API Key", 
+                        type="password",
+                        value=os.environ.get("OPENAI_API_KEY", "")
+                    )
+                
+                # Model Selection
+                with gr.Accordion("Model Selection", open=True):
+                    # Text Generator
+                    text_gen = gr.Dropdown(
+                        choices=["gemini", "g4f", "openai"], 
+                        label="Text Generator",
+                        value="gemini"
+                    )
+                    text_model = gr.Dropdown(
+                        choices=get_text_generator_models("gemini"), 
+                        label="Text Model",
+                        value="gemini-2.0-flash"
+                    )
+                    
+                    # Image Generator
+                    image_gen = gr.Dropdown(
+                        choices=["prodia", "hercai", "g4f", "segmind", "pollinations"],
+                        label="Image Generator",
+                        value="prodia"
+                    )
+                    image_model = gr.Dropdown(
+                        choices=get_image_generator_models("prodia"),
+                        label="Image Model",
+                        value="sdxl"
+                    )
+                    
+                    # TTS Engine
+                    tts_engine = gr.Dropdown(
+                        choices=["elevenlabs", "gtts", "openai", "edge"],
+                        label="Text-to-Speech Engine",
+                        value="elevenlabs"
+                    )
+                    tts_voice = gr.Dropdown(
+                        choices=get_tts_voices("elevenlabs"),
+                        label="TTS Voice",
+                        value="Sarah"
+                    )
+                
+                # Subtitle Options
+                with gr.Accordion("Subtitle Options", open=False):
+                    subtitle_font = gr.Dropdown(
+                        choices=["Helvetica-Bold", "Arial-Bold", "Impact", "Comic-Sans-MS"],
+                        label="Font",
+                        value="Helvetica-Bold"
+                    )
+                    font_size = gr.Slider(
+                        minimum=40,
+                        maximum=120,
+                        value=80,
+                        step=5,
+                        label="Font Size"
+                    )
+                    
+                    with gr.Row():
+                        text_color = gr.ColorPicker(label="Text Color", value="#FFFFFF")
+                        highlight_color = gr.ColorPicker(label="Highlight Color", value="#0000FF")
+                
+                # Generate button
+                generate_btn = gr.Button("Generate Video", variant="primary")
+                
+            with gr.Column(scale=1):
+                # Output display
+                video_output = gr.Video(label="Generated Video")
+                
+                with gr.Tabs():
+                    with gr.TabItem("Metadata"):
+                        title_output = gr.Textbox(label="Title")
+                        description_output = gr.Textbox(label="Description", lines=3)
+                        script_output = gr.Textbox(label="Script", lines=5)
+                    
+                    with gr.TabItem("Logs"):
+                        log_output = gr.Textbox(label="Process Log", lines=10)
+        
+        # Dynamic dropdown updates
+        def update_text_models(generator):
+            return gr.Dropdown(choices=get_text_generator_models(generator))
+        
+        def update_image_models(generator):
+            return gr.Dropdown(choices=get_image_generator_models(generator))
+        
+        def update_tts_voices(engine):
+            return gr.Dropdown(choices=get_tts_voices(engine))
         
-        // Update font size value display
-        document.getElementById('font_size').addEventListener('input', function() {
-            document.getElementById('font_size_value').textContent = this.value;
-        });
+        # Connect the change events
+        text_gen.change(fn=update_text_models, inputs=text_gen, outputs=text_model)
+        image_gen.change(fn=update_image_models, inputs=image_gen, outputs=image_model)
+        tts_engine.change(fn=update_tts_voices, inputs=tts_engine, outputs=tts_voice)
         
-        // Generate button click handler
-        document.getElementById('generate_btn').addEventListener('click', function() {
-            const niche = document.getElementById('niche').value.trim();
-            const language = document.getElementById('language').value;
+        # Main generation function
+        def generate_youtube_short(niche, language, gemini_api_key, assemblyai_api_key, 
+                                  elevenlabs_api_key, segmind_api_key, openai_api_key,
+                                  text_gen, text_model, image_gen, image_model, 
+                                  tts_engine, tts_voice, subtitle_font, font_size, 
+                                  text_color, highlight_color, progress=gr.Progress()):
             
-            if (!niche) {
-                alert('Please enter a niche/topic');
-                return;
-            }
+            if not niche.strip():
+                return {
+                    video_output: None,
+                    title_output: "ERROR: Please enter a niche/topic",
+                    description_output: "",
+                    script_output: "",
+                    log_output: "Error: Niche/Topic is required. Please enter a valid topic and try again."
+                }
             
-            // Show loading state
-            document.getElementById('loading').classList.remove('hidden');
-            document.getElementById('loading').classList.add('flex');
-            document.getElementById('results').classList.add('hidden');
+            # Create API keys dictionary
+            api_keys = {
+                'gemini': gemini_api_key,
+                'assemblyai': assemblyai_api_key,
+                'elevenlabs': elevenlabs_api_key,
+                'segmind': segmind_api_key,
+                'openai': openai_api_key
+            }
             
-            // Simulate Gradio API call
-            simulateGradioProcess(niche, language);
-        });
-        
-        // Function to simulate the Gradio process
-        async function simulateGradioProcess(niche, language) {
-            try {
-                // Simulated process steps
-                await updateProgressWithDelay('Starting video generation process...', 1000);
-                await updateProgressWithDelay('Generating topic...', 2000);
-                const topic = `How ${niche} can improve your daily life`;
-                logProcessOutput(`Generated topic: "${topic}"`);
+            try:
+                # Initialize YouTube class
+                yt = YouTube(
+                    niche=niche,
+                    language=language,
+                    text_gen=text_gen,
+                    text_model=text_model,
+                    image_gen=image_gen,
+                    image_model=image_model,
+                    tts_engine=tts_engine,
+                    tts_voice=tts_voice,
+                    subtitle_font=subtitle_font,
+                    font_size=font_size,
+                    text_color=text_color,
+                    highlight_color=highlight_color,
+                    api_keys=api_keys,
+                    progress=progress
+                )
                 
-                await updateProgressWithDelay('Creating script...', 3000);
-                const script = `Did you know that ${niche} can transform how you approach everyday challenges? Studies show that incorporating ${niche} into your routine can boost productivity by up to 30%. The key is consistency - even just 10 minutes daily makes a difference. Start small, build gradually, and watch as your skills improve. Don't overthink it - the best time to start with ${niche} is right now.`;
-                logProcessOutput(`Generated script (${script.length} chars)`);
+                # Generate video
+                result = yt.generate_video()
                 
-                await updateProgressWithDelay('Creating title and description...', 2000);
-                const title = `Transform Your Life With ${niche} - Simple Daily Hack! #productivity #lifestyle #growth`;
-                const description = `Learn how incorporating ${niche} into your daily routine can dramatically improve your productivity and quality of life. Try these simple techniques today!`;
-                logProcessOutput(`Generated title: "${title}"`);
-                logProcessOutput(`Generated description`);
+                # Check if video was successfully created
+                if not result or not result.get('video_path') or not os.path.exists(result.get('video_path', '')):
+                    return {
+                        video_output: None,
+                        title_output: "ERROR: Video generation failed",
+                        description_output: "",
+                        script_output: "",
+                        log_output: "\n".join(yt.logs)
+                    }
                 
-                await updateProgressWithDelay('Creating image prompts...', 2000);
-                const imagePrompts = [
-                    `A person happily implementing ${niche} in their daily routine, vibrant colors`,
-                    `Before and after comparison showing the benefits of ${niche}, professional look`,
-                    `Closeup of tools or resources needed for ${niche}, detailed view`,
-                    `Person explaining ${niche} to others, teaching moment`,
-                    `Beautiful results of consistently practicing ${niche}, inspiring scene`
-                ];
-                for (let i = 0; i < imagePrompts.length; i++) {
-                    logProcessOutput(`Image prompt ${i+1}: "${imagePrompts[i]}"`);
+                return {
+                    video_output: result['video_path'],
+                    title_output: result['title'],
+                    description_output: result['description'],
+                    script_output: result['script'],
+                    log_output: "\n".join(result['logs'])
                 }
                 
-                await updateProgressWithDelay('Generating images...', 4000);
-                for (let i = 1; i <= 5; i++) {
-                    await updateProgressWithDelay(`Generating image ${i}/5...`, 800);
-                    logProcessOutput(`Image ${i} generated successfully`);
+            except Exception as e:
+                import traceback
+                error_details = f"Error: {str(e)}\n\n{traceback.format_exc()}"
+                return {
+                    video_output: None,
+                    title_output: f"ERROR: {str(e)}",
+                    description_output: "",
+                    script_output: "",
+                    log_output: error_details
                 }
-                
-                await updateProgressWithDelay('Creating voiceover...', 3000);
-                logProcessOutput(`Speech generated successfully`);
-                
-                await updateProgressWithDelay('Generating subtitles...', 2000);
-                logProcessOutput(`Generated word-level timing for ${script.split(' ').length} words`);
-                logProcessOutput(`Generated subtitle lines`);
-                
-                await updateProgressWithDelay('Combining elements into final video...', 3000);
-                logProcessOutput(`Processing video with word highlighting`);
-                logProcessOutput(`Adding background music at low volume`);
-                logProcessOutput(`Video successfully created`);
-                
-                // Display results
-                document.getElementById('loading').classList.add('hidden');
-                document.getElementById('loading').classList.remove('flex');
-                document.getElementById('results').classList.remove('hidden');
-                
-                // Set video player source
-                const videoPlayer = document.getElementById('video_player');
-                videoPlayer.src = 'https://sample-videos.com/video123/mp4/720/big_buck_bunny_720p_1mb.mp4';
-                
-                // Set metadata
-                document.getElementById('video_title').textContent = title;
-                document.getElementById('video_description').textContent = description;
-                
-            } catch (error) {
-                console.error('Error:', error);
-                document.getElementById('status_message').textContent = 'Error generating video';
-                document.getElementById('progress_detail').textContent = error.message || 'An unexpected error occurred';
-            }
-        }
         
-        // Helper function to update progress with delay
-        async function updateProgressWithDelay(message, delay) {
-            document.getElementById('progress_detail').textContent = message;
-            await new Promise(resolve => setTimeout(resolve, delay));
-        }
+        # Connect the button click event
+        generate_btn.click(
+            fn=generate_youtube_short,
+            inputs=[
+                niche, language, gemini_api_key, assemblyai_api_key, elevenlabs_api_key,
+                segmind_api_key, openai_api_key, text_gen, text_model, image_gen, image_model,
+                tts_engine, tts_voice, subtitle_font, font_size, text_color, highlight_color
+            ],
+            outputs=[video_output, title_output, description_output, script_output, log_output]
+        )
         
-        // Function to log process output
-        function logProcessOutput(message) {
-            const logOutput = document.getElementById('log_output');
-            const timestamp = new Date().toLocaleTimeString('en-US', { hour12: false });
-            logOutput.innerHTML += `[${timestamp}] ${message}<br>`;
-            logOutput.scrollTop = logOutput.scrollHeight;
-        }
-    </script>
-</body>
-</html>
\ No newline at end of file
+    return demo
+
+# Create and launch the interface
+if __name__ == "__main__":
+    # Create necessary directories
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    os.makedirs(MUSIC_DIR, exist_ok=True)
+    os.makedirs(FONTS_DIR, exist_ok=True)
+    
+    # Launch the app
+    demo = create_interface()
+    demo.launch()
\ No newline at end of file