ShortsGenerator0.3

Running

App Files Files Community

AZILS commited on Apr 8

Commit

554a992

verified ·

1 Parent(s): 889ab84

Update app.py

Browse files

Files changed (1) hide show

app.py +699 -488

app.py CHANGED Viewed

@@ -31,14 +31,13 @@ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 STATIC_DIR = os.path.join(BASE_DIR, "static")
 MUSIC_DIR = os.path.join(STATIC_DIR, "music")
 FONTS_DIR = os.path.join(STATIC_DIR, "fonts")
-# Use temp directory for faster file operations
-CACHE_DIR = os.path.join(tempfile.gettempdir(), "yt_shorts_generator")
 # Create necessary directories
 os.makedirs(STATIC_DIR, exist_ok=True)
 os.makedirs(MUSIC_DIR, exist_ok=True)
 os.makedirs(FONTS_DIR, exist_ok=True)
-os.makedirs(CACHE_DIR, exist_ok=True)
 # Helper functions for logging
 def info(message):
@@ -425,184 +424,166 @@ class YouTube:
         """Generate an image using the selected image generation model."""
         self.log(f"Generating image for prompt: {prompt[:50]}...")
-        # Use simpler file naming for speed
-        image_path = os.path.join(CACHE_DIR, f"img_{len(self.images)}_{int(time.time())}.png")
-        try:
-            if self.image_gen == "prodia":
-                self.log("Using Prodia provider for image generation")
-                s = requests.Session()
-                headers = {
-                    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-                }
-                # Generate job
-                self.log("Sending generation request to Prodia API")
-                resp = s.get(
-                    "https://api.prodia.com/generate",
-                    params={
-                        "new": "true",
-                        "prompt": prompt,
-                        "model": self.image_model,
-                        "negative_prompt": "verybadimagenegative_v1.3",
-                        "steps": "20",
-                        "cfg": "7",
-                        "seed": random.randint(1, 10000),
-                        "sample": "DPM++ 2M Karras",
-                        "aspect_ratio": "square"
-                    },
-                    headers=headers
-                )
-                if resp.status_code != 200:
-                    raise Exception(f"Prodia API error: {resp.text}")
-                job_id = resp.json()['job']
-                self.log(f"Job created with ID: {job_id}")
-                # Wait for generation to complete
-                max_attempts = 30
-                attempts = 0
-                while attempts < max_attempts:
-                    attempts += 1
-                    time.sleep(2)
-                    status = s.get(f"https://api.prodia.com/job/{job_id}", headers=headers).json()
-                    if status["status"] == "succeeded":
-                        self.log("Image generation successful, downloading result")
-                        img_data = s.get(f"https://images.prodia.xyz/{job_id}.png?download=1", headers=headers).content
-                        with open(image_path, "wb") as f:
-                            f.write(img_data)
-                        self.images.append(image_path)
-                        self.log(success(f"Image saved to: {image_path}"))
-                        return image_path
-                    elif status["status"] == "failed":
-                        raise Exception(f"Prodia job failed: {status.get('error', 'Unknown error')}")
-                    # Still processing
-                    self.log(f"Still processing, attempt {attempts}/{max_attempts}...")
-                raise Exception("Prodia job timed out")
-            elif self.image_gen == "hercai":
-                self.log("Using Hercai provider for image generation")
-                url = f"https://hercai.onrender.com/{self.image_model}/text2image?prompt={prompt}"
-                r = requests.get(url)
-                if r.status_code != 200:
-                    raise Exception(f"Hercai API error: {r.text}")
-                parsed = r.json()
-                if "url" in parsed and parsed["url"]:
-                    self.log("Image URL received from Hercai")
-                    image_url = parsed["url"]
-                    img_data = requests.get(image_url).content
                     with open(image_path, "wb") as f:
                         f.write(img_data)
                     self.images.append(image_path)
                     self.log(success(f"Image saved to: {image_path}"))
                     return image_path
-                else:
-                    raise Exception("No image URL in Hercai response")
-            elif self.image_gen == "g4f":
-                self.log("Using G4F provider for image generation")
-                from g4f.client import Client
-                client = Client()
-                response = client.images.generate(
-                    model=self.image_model,
-                    prompt=prompt,
-                    response_format="url"
-                )
-                if response and response.data and len(response.data) > 0:
-                    image_url = response.data[0].url
-                    image_response = requests.get(image_url)
-                    if image_response.status_code == 200:
-                        with open(image_path, "wb") as f:
-                            f.write(image_response.content)
-                        self.images.append(image_path)
-                        self.log(success(f"Image saved to: {image_path}"))
-                        return image_path
-                    else:
-                        raise Exception(f"Failed to download image from {image_url}")
-                else:
-                    raise Exception("No image URL received from G4F")
-            elif self.image_gen == "segmind":
-                self.log("Using Segmind provider for image generation")
-                api_key = os.environ.get("SEGMIND_API_KEY", "")
-                if not api_key:
-                    raise ValueError("Segmind API key is not set. Please provide a valid API key.")
-                headers = {
-                    "x-api-key": api_key,
-                    "Content-Type": "application/json"
-                }
-                response = requests.post(
-                    "https://api.segmind.com/v1/sdxl-turbo",
-                    json={
-                        "prompt": prompt,
-                        "negative_prompt": "blurry, low quality, distorted face, text, watermark",
-                        "samples": 1,
-                        "size": "1024x1024",
-                        "guidance_scale": 1.0
-                    },
-                    headers=headers
-                )
-                if response.status_code == 200:
-                    with open(image_path, "wb") as f:
-                        f.write(response.content)
-                    self.images.append(image_path)
-                    self.log(success(f"Image saved to: {image_path}"))
-                    return image_path
-                else:
-                    raise Exception(f"Segmind request failed: {response.status_code} {response.text}")
-            elif self.image_gen == "pollinations":
-                self.log("Using Pollinations provider for image generation")
-                response = requests.get(f"https://image.pollinations.ai/prompt/{prompt}{random.randint(1,10000)}")
-                if response.status_code == 200:
-                    self.log("Image received from Pollinations")
                     with open(image_path, "wb") as f:
-                        f.write(response.content)
                     self.images.append(image_path)
                     self.log(success(f"Image saved to: {image_path}"))
                     return image_path
                 else:
-                    raise Exception(f"Pollinations request failed with status code: {response.status_code}")
             else:
-                # Create a fallback colored placeholder image instead of throwing an error
-                self.log(f"Unknown provider '{self.image_gen}'. Generating placeholder image.")
-                img = Image.new('RGB', (800, 800), color=(random.randint(0, 255),
-                                                         random.randint(0, 255),
-                                                         random.randint(0, 255)))
-                img.save(image_path)
                 self.images.append(image_path)
-                self.log(warning(f"Created placeholder image at: {image_path}"))
                 return image_path
-        except Exception as e:
-            error_msg = f"Image generation failed: {str(e)}"
-            self.log(error(error_msg))
-            # Create a fallback image instead of failing completely
-            try:
-                img = Image.new('RGB', (800, 800), color=(200, 200, 200))
-                image_path = os.path.join(CACHE_DIR, f"error_img_{len(self.images)}_{int(time.time())}.png")
-                img.save(image_path)
                 self.images.append(image_path)
-                self.log(warning(f"Created error placeholder image at: {image_path}"))
                 return image_path
-            except:
-                # If all else fails, return None and handle it gracefully
-                return None
     def generate_speech(self, text, output_format='mp3') -> str:
         """Generate speech from text using the selected TTS engine."""
@@ -614,144 +595,122 @@ class YouTube:
         self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
-        # Use simpler file naming for speed
-        audio_path = os.path.join(CACHE_DIR, f"speech_{int(time.time())}.{output_format}")
-        try:
-            if self.tts_engine == "elevenlabs":
-                self.log("Using ElevenLabs provider for speech generation")
-                elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY", "")
-                if not elevenlabs_api_key:
-                    raise ValueError("ElevenLabs API key is not set. Please provide a valid API key.")
-                headers = {
-                    "Accept": "audio/mpeg",
-                    "Content-Type": "application/json",
-                    "xi-api-key": elevenlabs_api_key
-                }
-                # Simplified payload to prevent "unusual activity" errors
-                payload = {
-                    "text": text,
-                    "model_id": "eleven_monolingual_v1",  # Use more stable model
-                    "voice_settings": {
-                        "stability": 0.5,
-                        "similarity_boost": 0.5
-                    }
-                }
-                # Map voice names to ElevenLabs voice IDs
-                voice_id_mapping = {
-                    "Sarah": "21m00Tcm4TlvDq8ikWAM",
-                    "Brian": "hxppwzoRmvxK7YkDrjhQ",
-                    "Lily": "p7TAj7L6QVq1fE6XGyjR",
-                    "Monika Sogam": "Fc3XhIu9tfgOPOsU1hMr",
-                    "George": "o7lPjDgzlF8ZAeSpqmaN",
-                    "River": "f0k5evLkhJxrIRJXQJvy",
-                    "Matilda": "XrExE9yKIg1WjnnlVkGX",
-                    "Will": "pvKWM1B1sNRNTlEYYAEZ",
-                    "Jessica": "A5EAMYWMCSsLNL1wYxOv",
-                    "default": "21m00Tcm4TlvDq8ikWAM"  # Default to Sarah
-                }
-                # Get the voice ID from mapping or use the voice name as ID if not found
-                voice_id = voice_id_mapping.get(self.tts_voice, self.tts_voice)
-                self.log(f"Using ElevenLabs voice: {self.tts_voice} (ID: {voice_id})")
-                response = requests.post(
-                    url=f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
-                    json=payload,
-                    headers=headers
-                )
-                if response.status_code == 200:
-                    with open(audio_path, 'wb') as f:
-                        f.write(response.content)
-                    self.log(success(f"Speech generated successfully using ElevenLabs at {audio_path}"))
-                else:
-                    try:
-                        error_data = response.json()
-                        error_message = error_data.get('detail', {}).get('message', response.text)
-                        error_status = error_data.get('status', 'error')
-                        raise Exception(f"ElevenLabs API error ({response.status_code}, {error_status}): {error_message}")
-                    except ValueError:
-                        # If JSON parsing fails, use the raw response
-                        raise Exception(f"ElevenLabs API error ({response.status_code}): {response.text}")
-            elif self.tts_engine == "gtts":
-                self.log("Using Google TTS provider for speech generation")
-                from gtts import gTTS
-                tts = gTTS(text=text, lang=self.language[:2].lower(), slow=False)
-                tts.save(audio_path)
-            elif self.tts_engine == "openai":
-                self.log("Using OpenAI provider for speech generation")
-                openai_api_key = os.environ.get("OPENAI_API_KEY", "")
-                if not openai_api_key:
-                    raise ValueError("OpenAI API key is not set. Please provide a valid API key.")
-                from openai import OpenAI
-                client = OpenAI(api_key=openai_api_key)
-                voice = self.tts_voice if self.tts_voice else "alloy"
-                response = client.audio.speech.create(
-                    model="tts-1",
-                    voice=voice,
-                    input=text
-                )
-                response.stream_to_file(audio_path)
-            elif self.tts_engine == "edge":
-                self.log("Using Edge TTS provider for speech generation")
-                import edge_tts
-                import asyncio
-                voice = self.tts_voice if self.tts_voice else "en-US-AriaNeural"
-                async def generate():
-                    communicate = edge_tts.Communicate(text, voice)
-                    await communicate.save(audio_path)
-                asyncio.run(generate())
             else:
-                # Default to edge TTS if other methods aren't available
-                self.log(f"Using default Edge TTS as fallback")
-                import edge_tts
-                import asyncio
-                voice = "en-US-AriaNeural"
-                async def generate():
-                    communicate = edge_tts.Communicate(text, voice)
-                    await communicate.save(audio_path)
-                asyncio.run(generate())
-            self.log(success(f"Speech generated and saved to: {audio_path}"))
-            self.tts_path = audio_path
-            return audio_path
-        except Exception as e:
-            error_msg = f"Speech generation failed: {str(e)}"
-            self.log(error(error_msg))
-            # Create a silent audio file as fallback
-            try:
-                from pydub import AudioSegment
-                from pydub.generators import Sine
-                # Generate 30 seconds of silence
-                silence = AudioSegment.silent(duration=30000)
-                silence.export(audio_path, format=output_format)
-                self.log(warning(f"Created silent audio fallback at: {audio_path}"))
-                self.tts_path = audio_path
-                return audio_path
-            except:
-                self.log(error("Failed to create silent audio fallback"))
-                return None
     def generate_subtitles(self, audio_path: str) -> dict:
         """Generate subtitles from audio using AssemblyAI."""
@@ -854,17 +813,24 @@ class YouTube:
             self.log(success(f"Generated {len(subtitles)} subtitle lines"))
             # Return the subtitle data and settings
             return {
                 "wordlevel": wordlevel_info,
                 "linelevel": subtitles,
                 "settings": {
                     "font": FONT,
                     "fontsize": FONTSIZE,
                     "color": COLOR,
                     "bg_color": BG_COLOR,
                     "position": self.subtitle_position,
-                    "highlighting_enabled": self.highlighting_enabled
                 }
             }
@@ -872,9 +838,99 @@ class YouTube:
             error_msg = f"Error generating subtitles: {str(e)}"
             self.log(error(error_msg))
             raise Exception(error_msg)
     def create_subtitle_clip(self, subtitle_data, frame_size):
         """Create subtitle clips for a line of text with word-level highlighting."""
         settings = subtitle_data["settings"]
         font_name = settings["font"]
         fontsize = settings["fontsize"]
@@ -882,69 +938,39 @@ class YouTube:
         bg_color = settings["bg_color"]
         highlighting_enabled = settings["highlighting_enabled"]
-        def create_text_clip(text, font_size, color, bg_color=None):
-            try:
-                # Try to use the specified font, fallback to default
-                try:
-                    # Check if font is a path or just a name
-                    font_path = os.path.join(FONTS_DIR, f"{font_name}.ttf")
-                    if os.path.exists(font_path):
-                        pil_font = ImageFont.truetype(font_path, font_size)
-                    else:
-                        self.log(warning(f"Font {font_name} not found, using default"))
-                        pil_font = ImageFont.load_default()
-                except Exception as e:
-                    self.log(warning(f"Error loading font: {str(e)}"))
-                    pil_font = ImageFont.load_default()
-                # Get text size
-                text_width, text_height = pil_font.getbbox(text)[2:4]
-                # Add padding
-                padding = 10
-                img_width = text_width + padding * 2
-                img_height = text_height + padding * 2
-                # Create image with background color or transparent
-                if bg_color:
-                    if bg_color.startswith('#'):
-                        bg_color_rgb = tuple(int(bg_color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
-                    else:
-                        bg_color_rgb = (0, 0, 255)  # Default blue
-                    img = Image.new('RGB', (img_width, img_height), color=bg_color_rgb)
-                else:
-                    img = Image.new('RGBA', (img_width, img_height), color=(0, 0, 0, 0))
-                # Draw text
-                draw = ImageDraw.Draw(img)
-                if color.startswith('#'):
-                    text_color_rgb = tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
-                else:
-                    text_color_rgb = (255, 255, 255)  # Default white
-                draw.text((padding, padding), text, font=pil_font, fill=text_color_rgb)
-                # Convert to numpy array for MoviePy
-                img_array = np.array(img)
-                clip = ImageClip(img_array)
-                return clip, img_width, img_height
-            except Exception as e:
-                self.log(warning(f"Error creating text clip: {str(e)}"))
-                # Create a simple colored rectangle as fallback
-                img = Image.new('RGB', (100, 50), color=(100, 100, 100))
-                img_array = np.array(img)
-                clip = ImageClip(img_array)
-                return clip, 100, 50
         subtitle_clips = []
-        for line in subtitle_data["linelevel"]:
-            x_pos = 0
-            y_pos = 0
-            word_positions = []
-            # Calculate vertical position based on subtitle position setting
             if settings["position"] == "top":
                 y_buffer = frame_size[1] * 0.1  # 10% from top
             elif settings["position"] == "middle":
@@ -952,70 +978,213 @@ class YouTube:
             else:  # bottom
                 y_buffer = frame_size[1] * 0.7  # 70% from top
-            x_buffer = frame_size[0] * 0.1  # 10% from left
             space_width = 20
-            # Create clips for each word in the line
-            for word_data in line["words"]:
-                word = word_data["word"]
-                start_time = word_data["start"]
-                end_time = word_data["end"]
-                duration = end_time - start_time
-                # Create word clip
-                word_clip, word_width, word_height = create_text_clip(word, fontsize, color)
-                # Check if word fits on current line
-                if x_pos + word_width + space_width > frame_size[0] - 2 * x_buffer:
-                    x_pos = 0
-                    y_pos += word_height + 20
-                # Store word position info
-                word_positions.append({
-                    "word": word,
-                    "x_pos": x_pos + x_buffer,
-                    "y_pos": y_pos + y_buffer,
-                    "width": word_width,
-                    "height": word_height,
-                    "start": start_time,
-                    "end": end_time
-                })
-                # Set position and timing for word clip
-                word_clip = word_clip.set_position((x_pos + x_buffer, y_pos + y_buffer))
-                word_clip = word_clip.set_start(line["start"]).set_duration(line["end"] - line["start"])
-                subtitle_clips.append(word_clip)
-                # Add space after word
-                space_clip, _, _ = create_text_clip(" ", fontsize, color)
-                space_clip = space_clip.set_position((x_pos + word_width + x_buffer, y_pos + y_buffer))
-                space_clip = space_clip.set_start(line["start"]).set_duration(line["end"] - line["start"])
-                subtitle_clips.append(space_clip)
-                x_pos += word_width + space_width
-            # Add highlighted words if enabled
-            if highlighting_enabled and bg_color:
-                for word_pos in word_positions:
-                    highlight_clip, _, _ = create_text_clip(
-                        word_pos["word"],
-                        fontsize,
-                        color,
-                        bg_color
-                    )
-                    highlight_clip = highlight_clip.set_position((word_pos["x_pos"], word_pos["y_pos"]))
-                    highlight_clip = highlight_clip.set_start(word_pos["start"]).set_duration(word_pos["end"] - word_pos["start"])
-                    subtitle_clips.append(highlight_clip)
-        return subtitle_clips
     def combine(self) -> str:
         """Combine images, audio, and subtitles into a final video."""
         self.progress(0.8, desc="Creating final video")
         self.log("Combining images and audio into final video")
         try:
-            # Use simple file naming for faster processing
-            output_path = os.path.join(CACHE_DIR, f"output_{int(time.time())}.mp4")
             # Check for required files
             if not self.images:
@@ -1032,64 +1201,75 @@ class YouTube:
             num_images = len(self.images)
             req_dur = max_duration / num_images
-            # Create video clips from images
             clips = []
             tot_dur = 0
-            # Loop through images, repeating if necessary to fill audio duration
-            while tot_dur < max_duration:
-                for image_path in self.images:
-                    # Check if image exists and is valid
-                    if not os.path.exists(image_path):
-                        self.log(warning(f"Image not found: {image_path}, skipping"))
-                        continue
-                    try:
-                        clip = ImageClip(image_path)
-                        clip = clip.set_duration(req_dur)
-                        clip = clip.set_fps(30)
-                        # Handle aspect ratio (vertical video for shorts)
-                        aspect_ratio = 9/16  # Standard vertical video ratio
-                        if clip.w / clip.h < aspect_ratio:
-                            # Image is too tall, crop height
-                            clip = crop(
-                                clip,
-                                width=clip.w,
-                                height=round(clip.w / aspect_ratio),
-                                x_center=clip.w / 2,
-                                y_center=clip.h / 2
-                            )
-                        else:
-                            # Image is too wide, crop width
-                            clip = crop(
-                                clip,
-                                width=round(aspect_ratio * clip.h),
-                                height=clip.h,
-                                x_center=clip.w / 2,
-                                y_center=clip.h / 2
-                            )
-                        # Resize to standard size for shorts
-                        clip = clip.resize((1080, 1920))
-                        clips.append(clip)
-                        tot_dur += clip.duration
-                        # If we've exceeded the duration, break
-                        if tot_dur >= max_duration:
-                            break
-                    except Exception as e:
-                        self.log(warning(f"Error processing image {image_path}: {str(e)}"))
             # Create video from clips
             self.log(f"Creating video from {len(clips)} clips")
             final_clip = concatenate_videoclips(clips)
             final_clip = final_clip.set_fps(30)
-            # Add subtitles if enabled
             if self.subtitles_enabled and hasattr(self, 'subtitle_data'):
-                subtitle_clips = self.create_subtitle_clip(self.subtitle_data, (1080, 1920))
-                final_clip = CompositeVideoClip([final_clip] + subtitle_clips)
             # Add background music if available
             music_path = None
@@ -1121,7 +1301,7 @@ class YouTube:
             # Set final audio
             final_clip = final_clip.set_audio(final_audio)
-            # Write final video - use faster encoding settings
             self.log("Writing final video file")
             final_clip.write_videofile(
                 output_path,
@@ -1129,7 +1309,7 @@ class YouTube:
                 codec="libx264",
                 audio_codec="aac",
                 threads=4,
-                # Remove preset parameter for faster encoding
             )
             self.log(success(f"Video saved to: {output_path}"))
@@ -1138,34 +1318,33 @@ class YouTube:
         except Exception as e:
             error_msg = f"Error combining video: {str(e)}"
             self.log(error(error_msg))
-            # Create a minimal fallback video if possible
-            try:
-                # Try to create a simple video with just the first image and audio
-                fallback_path = os.path.join(CACHE_DIR, f"fallback_{int(time.time())}.mp4")
-                if self.images and os.path.exists(self.images[0]) and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
-                    img_clip = ImageClip(self.images[0]).set_duration(10)
-                    img_clip = img_clip.resize((1080, 1920))
-                    audio_clip = AudioFileClip(self.tts_path).subclip(0, min(10, AudioFileClip(self.tts_path).duration))
-                    video_clip = img_clip.set_audio(audio_clip)
-                    video_clip.write_videofile(fallback_path, threads=2, codec='libx264', audio_codec='aac')
-                    self.log(warning(f"Created fallback video at: {fallback_path}"))
-                    return fallback_path
-                else:
-                    raise Exception("Cannot create fallback video: missing images or audio")
-            except Exception as fallback_error:
-                self.log(error(f"Failed to create fallback video: {str(fallback_error)}"))
-                return None
     def generate_video(self) -> dict:
         """Generate complete video with all components."""
         try:
             self.log("Starting video generation process")
-            # Create a simple generation directory - avoid complex numbering schemes
-            self.generation_folder = os.path.join(CACHE_DIR, f"gen_{int(time.time())}")
             os.makedirs(self.generation_folder, exist_ok=True)
             self.log(f"Created generation folder: {self.generation_folder}")
@@ -1206,8 +1385,46 @@ class YouTube:
             self.progress(0.7, desc="Generating subtitles")
             if self.subtitles_enabled and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
                 self.subtitle_data = self.generate_subtitles(self.tts_path)
-            # Step 8: Combine all elements into final video
             self.progress(0.8, desc="Creating final video")
             self.log("Combining all elements into final video")
             path = self.combine()
@@ -1229,13 +1446,7 @@ class YouTube:
         except Exception as e:
             error_msg = f"Error during video generation: {str(e)}"
             self.log(error(error_msg))
-            # Return basic data even on error
-            return {
-                'video_path': getattr(self, 'video_path', None),
-                'error': str(e),
-                'logs': self.logs
-            }
 # Data for dynamic dropdowns
 def get_text_generator_models(generator):
@@ -1377,12 +1588,12 @@ def create_interface():
                             text_gen = gr.Dropdown(
                                 choices=["g4f", "gemini", "openai"],
                                 label="Text Generator",
-                                value="g4f"
                             )
                             text_model = gr.Dropdown(
                                 choices=get_text_generator_models("g4f"),
                                 label="Text Model",
-                                value="gpt-4"
                             )
                         with gr.TabItem("Image"):
@@ -1621,7 +1832,7 @@ if __name__ == "__main__":
     os.makedirs(STATIC_DIR, exist_ok=True)
     os.makedirs(MUSIC_DIR, exist_ok=True)
     os.makedirs(FONTS_DIR, exist_ok=True)
-    os.makedirs(CACHE_DIR, exist_ok=True)
     # Launch the app
     demo = create_interface()

 STATIC_DIR = os.path.join(BASE_DIR, "static")
 MUSIC_DIR = os.path.join(STATIC_DIR, "music")
 FONTS_DIR = os.path.join(STATIC_DIR, "fonts")
+STORAGE_DIR = os.path.join(BASE_DIR, "storage")
 # Create necessary directories
 os.makedirs(STATIC_DIR, exist_ok=True)
 os.makedirs(MUSIC_DIR, exist_ok=True)
 os.makedirs(FONTS_DIR, exist_ok=True)
+os.makedirs(STORAGE_DIR, exist_ok=True)
 # Helper functions for logging
 def info(message):
         """Generate an image using the selected image generation model."""
         self.log(f"Generating image for prompt: {prompt[:50]}...")
+        # Always save images directly to the generation folder when it exists
+        if hasattr(self, 'generation_folder') and os.path.exists(self.generation_folder):
+            image_path = os.path.join(self.generation_folder, f"img_{uuid.uuid4()}_{int(time.time())}.png")
+        else:
+            # Use STORAGE_DIR if no generation folder
+            image_path = os.path.join(STORAGE_DIR, f"img_{uuid.uuid4()}_{int(time.time())}.png")
+        if self.image_gen == "prodia":
+            self.log("Using Prodia provider for image generation")
+            s = requests.Session()
+            headers = {
+                "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+            }
+            # Generate job
+            self.log("Sending generation request to Prodia API")
+            resp = s.get(
+                "https://api.prodia.com/generate",
+                params={
+                    "new": "true",
+                    "prompt": prompt,
+                    "model": self.image_model,
+                    "negative_prompt": "verybadimagenegative_v1.3",
+                    "steps": "20",
+                    "cfg": "7",
+                    "seed": random.randint(1, 10000),
+                    "sample": "DPM++ 2M Karras",
+                    "aspect_ratio": "square"
+                },
+                headers=headers
+            )
+            if resp.status_code != 200:
+                raise Exception(f"Prodia API error: {resp.text}")
+            job_id = resp.json()['job']
+            self.log(f"Job created with ID: {job_id}")
+            # Wait for generation to complete
+            max_attempts = 30
+            attempts = 0
+            while attempts < max_attempts:
+                attempts += 1
+                time.sleep(2)
+                status = s.get(f"https://api.prodia.com/job/{job_id}", headers=headers).json()
+                if status["status"] == "succeeded":
+                    self.log("Image generation successful, downloading result")
+                    img_data = s.get(f"https://images.prodia.xyz/{job_id}.png?download=1", headers=headers).content
                     with open(image_path, "wb") as f:
                         f.write(img_data)
                     self.images.append(image_path)
                     self.log(success(f"Image saved to: {image_path}"))
                     return image_path
+                elif status["status"] == "failed":
+                    raise Exception(f"Prodia job failed: {status.get('error', 'Unknown error')}")
+                # Still processing
+                self.log(f"Still processing, attempt {attempts}/{max_attempts}...")
+            raise Exception("Prodia job timed out")
+        elif self.image_gen == "hercai":
+            self.log("Using Hercai provider for image generation")
+            url = f"https://hercai.onrender.com/{self.image_model}/text2image?prompt={prompt}"
+            r = requests.get(url)
+            if r.status_code != 200:
+                raise Exception(f"Hercai API error: {r.text}")
+            parsed = r.json()
+            if "url" in parsed and parsed["url"]:
+                self.log("Image URL received from Hercai")
+                image_url = parsed["url"]
+                img_data = requests.get(image_url).content
+                with open(image_path, "wb") as f:
+                    f.write(img_data)
+                self.images.append(image_path)
+                self.log(success(f"Image saved to: {image_path}"))
+                return image_path
+            else:
+                raise Exception("No image URL in Hercai response")
+        elif self.image_gen == "g4f":
+            self.log("Using G4F provider for image generation")
+            from g4f.client import Client
+            client = Client()
+            response = client.images.generate(
+                model=self.image_model,
+                prompt=prompt,
+                response_format="url"
+            )
+            if response and response.data and len(response.data) > 0:
+                image_url = response.data[0].url
+                image_response = requests.get(image_url)
+                if image_response.status_code == 200:
                     with open(image_path, "wb") as f:
+                        f.write(image_response.content)
                     self.images.append(image_path)
                     self.log(success(f"Image saved to: {image_path}"))
                     return image_path
                 else:
+                    raise Exception(f"Failed to download image from {image_url}")
             else:
+                raise Exception("No image URL received from G4F")
+        elif self.image_gen == "segmind":
+            self.log("Using Segmind provider for image generation")
+            api_key = os.environ.get("SEGMIND_API_KEY", "")
+            if not api_key:
+                raise ValueError("Segmind API key is not set. Please provide a valid API key.")
+            headers = {
+                "x-api-key": api_key,
+                "Content-Type": "application/json"
+            }
+            response = requests.post(
+                "https://api.segmind.com/v1/sdxl-turbo",
+                json={
+                    "prompt": prompt,
+                    "negative_prompt": "blurry, low quality, distorted face, text, watermark",
+                    "samples": 1,
+                    "size": "1024x1024",
+                    "guidance_scale": 1.0
+                },
+                headers=headers
+            )
+            if response.status_code == 200:
+                with open(image_path, "wb") as f:
+                    f.write(response.content)
                 self.images.append(image_path)
+                self.log(success(f"Image saved to: {image_path}"))
                 return image_path
+            else:
+                raise Exception(f"Segmind request failed: {response.status_code} {response.text}")
+        elif self.image_gen == "pollinations":
+            self.log("Using Pollinations provider for image generation")
+            response = requests.get(f"https://image.pollinations.ai/prompt/{prompt}{random.randint(1,10000)}")
+            if response.status_code == 200:
+                self.log("Image received from Pollinations")
+                with open(image_path, "wb") as f:
+                    f.write(response.content)
                 self.images.append(image_path)
+                self.log(success(f"Image saved to: {image_path}"))
                 return image_path
+            else:
+                raise Exception(f"Pollinations request failed with status code: {response.status_code}")
+        else:
+            # No fallback, raise an exception for unsupported image generator
+            error_msg = f"Unsupported image generator: {self.image_gen}"
+            self.log(error(error_msg))
+            raise ValueError(error_msg)
     def generate_speech(self, text, output_format='mp3') -> str:
         """Generate speech from text using the selected TTS engine."""
         self.log(f"Using TTS Engine: {self.tts_engine}, Voice: {self.tts_voice}")
+        # Always save to the generation folder when available
+        if hasattr(self, 'generation_folder') and os.path.exists(self.generation_folder):
+            audio_path = os.path.join(self.generation_folder, f"speech_{uuid.uuid4()}_{int(time.time())}.{output_format}")
+        else:
+            # Use STORAGE_DIR if no generation folder
+            audio_path = os.path.join(STORAGE_DIR, f"speech_{uuid.uuid4()}_{int(time.time())}.{output_format}")
+        if self.tts_engine == "elevenlabs":
+            self.log("Using ElevenLabs provider for speech generation")
+            elevenlabs_api_key = os.environ.get("ELEVENLABS_API_KEY", "")
+            if not elevenlabs_api_key:
+                raise ValueError("ElevenLabs API key is not set. Please provide a valid API key.")
+            headers = {
+                "Accept": "audio/mpeg",
+                "Content-Type": "application/json",
+                "xi-api-key": elevenlabs_api_key
+            }
+            payload = {
+                "text": text,
+                "model_id": "eleven_turbo_v2",  # Using latest and most capable model
+                "voice_settings": {
+                    "stability": 0.5,
+                    "similarity_boost": 0.5,
+                    "style": 0.0,
+                    "use_speaker_boost": True
+                },
+                "output_format": "mp3_44100_128",  # Higher quality audio (44.1kHz, 128kbps)
+                "optimize_streaming_latency": 0    # Optimize for quality over latency
+            }
+            # Map voice names to ElevenLabs voice IDs
+            voice_id_mapping = {
+                "Sarah": "21m00Tcm4TlvDq8ikWAM",
+                "Brian": "hxppwzoRmvxK7YkDrjhQ",
+                "Lily": "p7TAj7L6QVq1fE6XGyjR",
+                "Monika Sogam": "Fc3XhIu9tfgOPOsU1hMr",
+                "George": "o7lPjDgzlF8ZAeSpqmaN",
+                "River": "f0k5evLkhJxrIRJXQJvy",
+                "Matilda": "XrExE9yKIg1WjnnlVkGX",
+                "Will": "pvKWM1B1sNRNTlEYYAEZ",
+                "Jessica": "A5EAMYWMCSsLNL1wYxOv",
+                "default": "21m00Tcm4TlvDq8ikWAM"  # Default to Sarah
+            }
+            # Get the voice ID from mapping or use the voice name as ID if not found
+            voice_id = voice_id_mapping.get(self.tts_voice, self.tts_voice)
+            self.log(f"Using ElevenLabs voice: {self.tts_voice} (ID: {voice_id})")
+            response = requests.post(
+                url=f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
+                json=payload,
+                headers=headers
+            )
+            if response.status_code == 200:
+                with open(audio_path, 'wb') as f:
+                    f.write(response.content)
+                self.log(success(f"Speech generated successfully using ElevenLabs at {audio_path}"))
             else:
+                try:
+                    error_data = response.json()
+                    error_message = error_data.get('detail', {}).get('message', response.text)
+                    error_status = error_data.get('status', 'error')
+                    raise Exception(f"ElevenLabs API error ({response.status_code}, {error_status}): {error_message}")
+                except ValueError:
+                    # If JSON parsing fails, use the raw response
+                    raise Exception(f"ElevenLabs API error ({response.status_code}): {response.text}")
+        elif self.tts_engine == "gtts":
+            self.log("Using Google TTS provider for speech generation")
+            from gtts import gTTS
+            tts = gTTS(text=text, lang=self.language[:2].lower(), slow=False)
+            tts.save(audio_path)
+        elif self.tts_engine == "openai":
+            self.log("Using OpenAI provider for speech generation")
+            openai_api_key = os.environ.get("OPENAI_API_KEY", "")
+            if not openai_api_key:
+                raise ValueError("OpenAI API key is not set. Please provide a valid API key.")
+            from openai import OpenAI
+            client = OpenAI(api_key=openai_api_key)
+            voice = self.tts_voice if self.tts_voice else "alloy"
+            response = client.audio.speech.create(
+                model="tts-1",
+                voice=voice,
+                input=text
+            )
+            response.stream_to_file(audio_path)
+        elif self.tts_engine == "edge":
+            self.log("Using Edge TTS provider for speech generation")
+            import edge_tts
+            import asyncio
+            voice = self.tts_voice if self.tts_voice else "en-US-AriaNeural"
+            async def generate():
+                communicate = edge_tts.Communicate(text, voice)
+                await communicate.save(audio_path)
+            asyncio.run(generate())
+        else:
+            # No fallback, raise an exception for unsupported TTS engine
+            error_msg = f"Unsupported TTS engine: {self.tts_engine}"
+            self.log(error(error_msg))
+            raise ValueError(error_msg)
+        self.log(success(f"Speech generated and saved to: {audio_path}"))
+        self.tts_path = audio_path
+        return audio_path
     def generate_subtitles(self, audio_path: str) -> dict:
         """Generate subtitles from audio using AssemblyAI."""
             self.log(success(f"Generated {len(subtitles)} subtitle lines"))
+            # Pre-wrap subtitle lines for more efficient rendering
+            self.log("Pre-calculating subtitle line wrapping...")
+            wrapped_subtitles = self._pre_wrap_subtitle_lines(subtitles, FRAME_SIZE, FONT, FONTSIZE)
+            self.log(success(f"Pre-wrapped {len(wrapped_subtitles)} subtitle lines"))
             # Return the subtitle data and settings
             return {
                 "wordlevel": wordlevel_info,
                 "linelevel": subtitles,
+                "wrappedlines": wrapped_subtitles,
                 "settings": {
                     "font": FONT,
                     "fontsize": FONTSIZE,
                     "color": COLOR,
                     "bg_color": BG_COLOR,
                     "position": self.subtitle_position,
+                    "highlighting_enabled": self.highlighting_enabled,
+                    "subtitles_enabled": self.subtitles_enabled
                 }
             }
             error_msg = f"Error generating subtitles: {str(e)}"
             self.log(error(error_msg))
             raise Exception(error_msg)
+    def _pre_wrap_subtitle_lines(self, subtitles, frame_size, font_name, font_size):
+        """Pre-calculate line wrapping for subtitles based on video dimensions."""
+        self.log("Pre-calculating subtitle line wrapping")
+        # Load the font once
+        try:
+            font_path = os.path.join(FONTS_DIR, f"{font_name}.ttf")
+            if os.path.exists(font_path):
+                pil_font = ImageFont.truetype(font_path, font_size)
+            else:
+                self.log(warning(f"Font {font_name} not found, using default"))
+                pil_font = ImageFont.load_default()
+        except Exception as e:
+            self.log(warning(f"Error loading font: {str(e)}"))
+            pil_font = ImageFont.load_default()
+        # Calculate max width for text (80% of frame width)
+        max_width = frame_size[0] * 0.8
+        x_buffer = frame_size[0] * 0.1  # 10% buffer on each side
+        space_width = 20  # Approximate space width
+        wrapped_subtitles = []
+        for line in subtitles:
+            # Process the line into visual lines with exact positions
+            visual_lines = []
+            current_line = []
+            current_x = 0
+            line_number = 0
+            # Break points for natural text wrapping
+            break_points = {'.', ',', '!', '?', ';', ':', '-', '—'}
+            for word_data in line["words"]:
+                word = word_data["word"]
+                # Get word width including space
+                try:
+                    word_width = pil_font.getbbox(word)[2] + space_width
+                except:
+                    # Fallback if getbbox fails
+                    word_width = len(word) * (font_size // 2) + space_width
+                # Check if word contains a break point
+                has_break = any(char in break_points for char in word)
+                # If this word would overflow or has a break point, start a new visual line
+                if (current_x + word_width > max_width and current_line) or (has_break and current_line and current_x > max_width * 0.7):
+                    # Store this completed visual line
+                    visual_line_text = " ".join(w["word"] for w in current_line)
+                    visual_lines.append({
+                        "line_number": line_number,
+                        "text": visual_line_text,
+                        "words": current_line.copy()
+                    })
+                    current_line = []
+                    current_x = 0
+                    line_number += 1
+                # Add word position information
+                positioned_word = word_data.copy()
+                positioned_word["x_offset"] = current_x
+                positioned_word["y_line"] = line_number
+                positioned_word["width"] = word_width
+                current_line.append(positioned_word)
+                current_x += word_width
+            # Add the last line if it exists
+            if current_line:
+                visual_line_text = " ".join(w["word"] for w in current_line)
+                visual_lines.append({
+                    "line_number": line_number,
+                    "text": visual_line_text,
+                    "words": current_line
+                })
+            # Return the wrapped line with visual formatting
+            wrapped_subtitles.append({
+                "original_text": line["text"],
+                "start": line["start"],
+                "end": line["end"],
+                "visual_lines": visual_lines
+            })
+        return wrapped_subtitles
     def create_subtitle_clip(self, subtitle_data, frame_size):
         """Create subtitle clips for a line of text with word-level highlighting."""
+        # Early return if subtitles are disabled
+        if not self.subtitles_enabled:
+            return []
         settings = subtitle_data["settings"]
         font_name = settings["font"]
         fontsize = settings["fontsize"]
         bg_color = settings["bg_color"]
         highlighting_enabled = settings["highlighting_enabled"]
+        # Pre-calculate text and background colors once
+        if color.startswith('#'):
+            text_color_rgb = tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
+        else:
+            text_color_rgb = (255, 255, 255)  # Default white
+        if bg_color and bg_color.startswith('#'):
+            bg_color_rgb = tuple(int(bg_color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4))
+        else:
+            bg_color_rgb = (0, 0, 255)  # Default blue
+        # Load font only once
+        try:
+            font_path = os.path.join(FONTS_DIR, f"{font_name}.ttf")
+            if os.path.exists(font_path):
+                pil_font = ImageFont.truetype(font_path, fontsize)
+            else:
+                self.log(warning(f"Font {font_name} not found, using default"))
+                pil_font = ImageFont.load_default()
+        except Exception as e:
+            self.log(warning(f"Error loading font: {str(e)}"))
+            pil_font = ImageFont.load_default()
+        # Pre-calculate common values
+        padding = 10
         subtitle_clips = []
+        # Check if we have pre-wrapped lines (faster method)
+        if "wrappedlines" in subtitle_data and subtitle_data["wrappedlines"]:
+            self.log("Using pre-wrapped subtitle lines for faster rendering")
+            wrapped_subtitles = subtitle_data["wrappedlines"]
+            # Calculate vertical position offset based on subtitle position setting
             if settings["position"] == "top":
                 y_buffer = frame_size[1] * 0.1  # 10% from top
             elif settings["position"] == "middle":
             else:  # bottom
                 y_buffer = frame_size[1] * 0.7  # 70% from top
+            # Create optimized text clip function that reuses font and color calculations
+            def create_text_clip(text, bg_color=None):
+                try:
+                    # Get text size
+                    text_width, text_height = pil_font.getbbox(text)[2:4]
+                    # Add padding
+                    img_width = text_width + padding * 2
+                    img_height = text_height + padding * 2
+                    # Create image with background color or transparent
+                    if bg_color:
+                        img = Image.new('RGB', (img_width, img_height), color=bg_color_rgb)
+                    else:
+                        img = Image.new('RGBA', (img_width, img_height), color=(0, 0, 0, 0))
+                    # Draw text
+                    draw = ImageDraw.Draw(img)
+                    draw.text((padding, padding), text, font=pil_font, fill=text_color_rgb)
+                    # Convert to numpy array for MoviePy
+                    img_array = np.array(img)
+                    clip = ImageClip(img_array)
+                    return clip, img_width, img_height
+                except Exception as e:
+                    self.log(warning(f"Error creating text clip: {str(e)}"))
+                    # Create a simple colored rectangle as fallback
+                    img = Image.new('RGB', (100, 50), color=(100, 100, 100))
+                    img_array = np.array(img)
+                    clip = ImageClip(img_array)
+                    return clip, 100, 50
+            # Process each pre-wrapped line
+            for wrapped_line in wrapped_subtitles:
+                line_start = wrapped_line["start"]
+                line_end = wrapped_line["end"]
+                line_duration = line_end - line_start
+                # Process each visual line separately
+                for visual_line in wrapped_line["visual_lines"]:
+                    line_number = visual_line["line_number"]
+                    line_text = visual_line["text"]
+                    # Calculate vertical position including line number offset
+                    line_y = y_buffer + (line_number * (fontsize + 20))
+                    # Create the line clip
+                    line_clip, line_width, _ = create_text_clip(line_text)
+                    line_clip = line_clip.set_position(('center', line_y))
+                    line_clip = line_clip.set_start(line_start).set_duration(line_duration)
+                    subtitle_clips.append(line_clip)
+                    # Add word highlights if enabled
+                    if highlighting_enabled and bg_color:
+                        # Calculate center offset for word positioning
+                        center_offset = (frame_size[0] - line_width) / 2
+                        for word_data in visual_line["words"]:
+                            word = word_data["word"]
+                            word_start = word_data["start"]
+                            word_end = word_data["end"]
+                            x_offset = word_data["x_offset"]
+                            # Create highlight clip
+                            highlight_clip, _, _ = create_text_clip(word, bg_color)
+                            highlight_clip = highlight_clip.set_position((center_offset + x_offset, line_y))
+                            highlight_clip = highlight_clip.set_start(word_start).set_duration(word_end - word_start)
+                            subtitle_clips.append(highlight_clip)
+            return subtitle_clips
+        # Fallback to old method if pre-wrapped lines aren't available
+        else:
+            self.log("Using standard subtitle rendering method")
+            # Legacy code for compatibility (should not normally be used)
+            # (existing code from current create_subtitle_clip method)
             space_width = 20
+            # Process each line
+            for line in subtitle_data["linelevel"]:
+                # Calculate vertical position once per line
+                if settings["position"] == "top":
+                    y_buffer = frame_size[1] * 0.1  # 10% from top
+                elif settings["position"] == "middle":
+                    y_buffer = frame_size[1] * 0.4  # 40% from top
+                else:  # bottom
+                    y_buffer = frame_size[1] * 0.7  # 70% from top
+                x_buffer = frame_size[0] * 0.1  # 10% from left
+                # Process line in batches where possible
+                x_pos = 0
+                y_pos = 0
+                word_positions = []
+                line_duration = line["end"] - line["start"]
+                # Pre-calculate word metrics to avoid redundant calculations
+                word_metrics = []
+                for word_data in line["words"]:
+                    word = word_data["word"]
+                    # Get word width including space
+                    try:
+                        word_width = pil_font.getbbox(word)[2] + space_width
+                    except:
+                        # Fallback if getbbox fails
+                        word_width = len(word) * (fontsize // 2) + space_width
+                    word_metrics.append({
+                        "word": word,
+                        "width": word_width,
+                        "height": fontsize,
+                        "start": word_data["start"],
+                        "end": word_data["end"]
+                    })
+                # Create optimized text clip function
+                def create_text_clip(text, bg_color=None):
+                    try:
+                        # Get text size
+                        text_width, text_height = pil_font.getbbox(text)[2:4]
+                        # Add padding
+                        img_width = text_width + padding * 2
+                        img_height = text_height + padding * 2
+                        # Create image with background color or transparent
+                        if bg_color:
+                            img = Image.new('RGB', (img_width, img_height), color=bg_color_rgb)
+                        else:
+                            img = Image.new('RGBA', (img_width, img_height), color=(0, 0, 0, 0))
+                        # Draw text
+                        draw = ImageDraw.Draw(img)
+                        draw.text((padding, padding), text, font=pil_font, fill=text_color_rgb)
+                        # Convert to numpy array for MoviePy
+                        img_array = np.array(img)
+                        clip = ImageClip(img_array)
+                        return clip, img_width, img_height
+                    except Exception as e:
+                        self.log(warning(f"Error creating text clip: {str(e)}"))
+                        # Create a simple colored rectangle as fallback
+                        img = Image.new('RGB', (100, 50), color=(100, 100, 100))
+                        img_array = np.array(img)
+                        clip = ImageClip(img_array)
+                        return clip, 100, 50
+                # First, create and position all the regular words at once
+                for i, metric in enumerate(word_metrics):
+                    word = metric["word"]
+                    word_width = metric["width"]
+                    word_height = metric["height"]
+                    # Check if word fits on current line
+                    if x_pos + word_width > frame_size[0] - 2 * x_buffer:
+                        x_pos = 0
+                        y_pos += word_height + 20
+                    # Store position info for highlighting
+                    word_positions.append({
+                        "word": word,
+                        "x_pos": x_pos + x_buffer,
+                        "y_pos": y_pos + y_buffer,
+                        "width": word_width,
+                        "height": word_height,
+                        "start": metric["start"],
+                        "end": metric["end"]
+                    })
+                    # Create the word clip
+                    word_clip, _, _ = create_text_clip(word)
+                    word_clip = word_clip.set_position((x_pos + x_buffer, y_pos + y_buffer))
+                    word_clip = word_clip.set_start(line["start"]).set_duration(line_duration)
+                    subtitle_clips.append(word_clip)
+                    # Add space after word (except for last word)
+                    if i < len(word_metrics) - 1:
+                        space_clip, _, _ = create_text_clip(" ")
+                        space_clip = space_clip.set_position((x_pos + word_width + x_buffer - space_width, y_pos + y_buffer))
+                        space_clip = space_clip.set_start(line["start"]).set_duration(line_duration)
+                        subtitle_clips.append(space_clip)
+                    x_pos += word_width
+                # Only add highlighted words if highlighting is enabled
+                if highlighting_enabled and bg_color:
+                    for word_pos in word_positions:
+                        highlight_clip, _, _ = create_text_clip(word_pos["word"], bg_color)
+                        highlight_clip = highlight_clip.set_position((word_pos["x_pos"], word_pos["y_pos"]))
+                        highlight_clip = highlight_clip.set_start(word_pos["start"]).set_duration(word_pos["end"] - word_pos["start"])
+                        subtitle_clips.append(highlight_clip)
+            return subtitle_clips
     def combine(self) -> str:
         """Combine images, audio, and subtitles into a final video."""
         self.progress(0.8, desc="Creating final video")
         self.log("Combining images and audio into final video")
         try:
+            # Always save to the generation folder when available
+            if hasattr(self, 'generation_folder') and os.path.exists(self.generation_folder):
+                output_path = os.path.join(self.generation_folder, f"output_{int(time.time())}.mp4")
+            else:
+                output_path = os.path.join(STORAGE_DIR, f"output_{int(time.time())}.mp4")
             # Check for required files
             if not self.images:
             num_images = len(self.images)
             req_dur = max_duration / num_images
+            # Create video clips from images more efficiently
+            self.log("Processing images for video")
             clips = []
             tot_dur = 0
+            # Pre-compute standard size and aspect ratio
+            target_size = (1080, 1920)
+            aspect_ratio = 9/16
+            # Process all images at once
+            for image_path in self.images:
+                # Check if image exists and is valid
+                if not os.path.exists(image_path):
+                    self.log(warning(f"Image not found: {image_path}, skipping"))
+                    continue
+                # Calculate remaining duration
+                duration = min(req_dur, max_duration - tot_dur)
+                if duration <= 0:
+                    break
+                try:
+                    clip = ImageClip(image_path)
+                    clip = clip.set_duration(duration)
+                    clip = clip.set_fps(30)
+                    # Handle aspect ratio (vertical video for shorts)
+                    if clip.w / clip.h < aspect_ratio:
+                        # Image is too tall, crop height
+                        clip = crop(
+                            clip,
+                            width=clip.w,
+                            height=round(clip.w / aspect_ratio),
+                            x_center=clip.w / 2,
+                            y_center=clip.h / 2
+                        )
+                    else:
+                        # Image is too wide, crop width
+                        clip = crop(
+                            clip,
+                            width=round(aspect_ratio * clip.h),
+                            height=clip.h,
+                            x_center=clip.w / 2,
+                            y_center=clip.h / 2
+                        )
+                    # Resize to standard size for shorts
+                    clip = clip.resize(target_size)
+                    clips.append(clip)
+                    tot_dur += duration
+                    # If we've exceeded the duration, break
+                    if tot_dur >= max_duration:
+                        break
+                except Exception as e:
+                    self.log(warning(f"Error processing image {image_path}: {str(e)}"))
             # Create video from clips
             self.log(f"Creating video from {len(clips)} clips")
             final_clip = concatenate_videoclips(clips)
             final_clip = final_clip.set_fps(30)
+            # Add subtitles if enabled - skip entirely if disabled
+            subtitle_clips = []
             if self.subtitles_enabled and hasattr(self, 'subtitle_data'):
+                self.log("Generating subtitle clips")
+                subtitle_clips = self.create_subtitle_clip(self.subtitle_data, target_size)
+                if subtitle_clips:
+                    final_clip = CompositeVideoClip([final_clip] + subtitle_clips)
             # Add background music if available
             music_path = None
             # Set final audio
             final_clip = final_clip.set_audio(final_audio)
+            # Write final video - use faster preset
             self.log("Writing final video file")
             final_clip.write_videofile(
                 output_path,
                 codec="libx264",
                 audio_codec="aac",
                 threads=4,
+                preset="ultrafast"  # Changed from "medium" to "ultrafast" for faster rendering
             )
             self.log(success(f"Video saved to: {output_path}"))
         except Exception as e:
             error_msg = f"Error combining video: {str(e)}"
             self.log(error(error_msg))
+            raise Exception(error_msg)
     def generate_video(self) -> dict:
         """Generate complete video with all components."""
         try:
             self.log("Starting video generation process")
+            # Create a unique folder with sequential numbering
+            folder_num = 1
+            # Check existing folders to find the latest number
+            if os.path.exists(STORAGE_DIR):
+                existing_folders = [d for d in os.listdir(STORAGE_DIR) if os.path.isdir(os.path.join(STORAGE_DIR, d))]
+                numbered_folders = []
+                for folder in existing_folders:
+                    try:
+                        # Extract folder number from format "N_UUID"
+                        if "_" in folder:
+                            num = int(folder.split("_")[0])
+                            numbered_folders.append(num)
+                    except (ValueError, IndexError):
+                        continue
+                if numbered_folders:
+                    folder_num = max(numbered_folders) + 1
+            folder_id = f"{folder_num}_{str(uuid.uuid4())}"
+            self.generation_folder = os.path.join(STORAGE_DIR, folder_id)
             os.makedirs(self.generation_folder, exist_ok=True)
             self.log(f"Created generation folder: {self.generation_folder}")
             self.progress(0.7, desc="Generating subtitles")
             if self.subtitles_enabled and hasattr(self, 'tts_path') and os.path.exists(self.tts_path):
                 self.subtitle_data = self.generate_subtitles(self.tts_path)
+                # Save subtitles to generation folder
+                if self.subtitle_data:
+                    try:
+                        # Save word-level subtitles
+                        if 'wordlevel' in self.subtitle_data:
+                            word_subtitles_path = os.path.join(self.generation_folder, "word_subtitles.json")
+                            with open(word_subtitles_path, 'w') as f:
+                                json.dump(self.subtitle_data['wordlevel'], f, indent=2)
+                            self.log(f"Saved word-level subtitles to: {word_subtitles_path}")
+                        # Save line-level subtitles
+                        if 'linelevel' in self.subtitle_data:
+                            line_subtitles_path = os.path.join(self.generation_folder, "line_subtitles.json")
+                            with open(line_subtitles_path, 'w') as f:
+                                json.dump(self.subtitle_data['linelevel'], f, indent=2)
+                            self.log(f"Saved line-level subtitles to: {line_subtitles_path}")
+                    except Exception as e:
+                        self.log(warning(f"Error saving subtitles to generation folder: {str(e)}"))
+            # Step 8: Save content.txt with all metadata and generation info
+            self.progress(0.75, desc="Saving generation data")
+            try:
+                content_path = os.path.join(self.generation_folder, "content.txt")
+                with open(content_path, 'w', encoding='utf-8') as f:
+                    f.write(f"NICHE: {self.niche}\n\n")
+                    f.write(f"LANGUAGE: {self.language}\n\n")
+                    f.write(f"GENERATED TOPIC: {self.subject}\n\n")
+                    f.write(f"GENERATED SCRIPT:\n{self.script}\n\n")
+                    f.write(f"GENERATED PROMPTS:\n")
+                    for i, prompt in enumerate(self.image_prompts, 1):
+                        f.write(f"{i}. {prompt}\n")
+                    f.write("\n")
+                    f.write(f"GENERATED METADATA:\n")
+                    for key, value in self.metadata.items():
+                        f.write(f"{key}: {value}\n")
+                self.log(f"Saved content.txt to: {content_path}")
+            except Exception as e:
+                self.log(warning(f"Error saving content.txt: {str(e)}"))
+            # Step 9: Combine all elements into final video
             self.progress(0.8, desc="Creating final video")
             self.log("Combining all elements into final video")
             path = self.combine()
         except Exception as e:
             error_msg = f"Error during video generation: {str(e)}"
             self.log(error(error_msg))
+            raise Exception(error_msg)
 # Data for dynamic dropdowns
 def get_text_generator_models(generator):
                             text_gen = gr.Dropdown(
                                 choices=["g4f", "gemini", "openai"],
                                 label="Text Generator",
+                                value="gemini"
                             )
                             text_model = gr.Dropdown(
                                 choices=get_text_generator_models("g4f"),
                                 label="Text Model",
+                                value="gemini-2.0-flash"
                             )
                         with gr.TabItem("Image"):
     os.makedirs(STATIC_DIR, exist_ok=True)
     os.makedirs(MUSIC_DIR, exist_ok=True)
     os.makedirs(FONTS_DIR, exist_ok=True)
+    os.makedirs(STORAGE_DIR, exist_ok=True)
     # Launch the app
     demo = create_interface()