Spaces:

Nishur
/

video_translator

Running

App Files Files Community

Nishur commited on Apr 11

Commit

dc43c7f

verified ·

1 Parent(s): 68acba2

Update app.py

Browse files

Files changed (1) hide show

app.py +404 -559

app.py CHANGED Viewed

@@ -1,616 +1,461 @@
 import gradio as gr
 import os
-import tempfile
 import subprocess
-import assemblyai as aai
 from deep_translator import GoogleTranslator
 import pysrt
-import logging
-import sys
 import shutil
-from pathlib import Path
 import time
 from tqdm import tqdm
-import torch
-from TTS.api import TTS
 # Set up logging
 logging.basicConfig(level=logging.INFO,
-                   format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-                   stream=sys.stdout)
 logger = logging.getLogger(__name__)
 # Configuration
-aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
 LANGUAGES = {
-    "English": "en",
-    "Spanish": "es",
-    "French": "fr",
-    "German": "de",
-    "Japanese": "ja",
-    "Hindi": "hi"
 }
-# TTS model mapping for different languages
-TTS_MODELS = {
-    "en": "tts_models/en/ljspeech/tacotron2-DDC_ph",
-    "es": "tts_models/es/css10/vits",
-    "fr": "tts_models/fr/css10/vits",
-    "de": "tts_models/de/thorsten/tacotron2-DDC",
-    "ja": "tts_models/ja/kokoro/tacotron2-DDC",
-    "hi": "tts_models/hi/kb/tacotron2-DDC"
 }
-# Create a permanent output directory
-OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # Initialize TTS
-def init_tts():
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    tts_models = {}
-    for lang_code, model_name in TTS_MODELS.items():
-        try:
-            tts = TTS(model_name=model_name, progress_bar=False).to(device)
-            tts_models[lang_code] = tts
-            logger.info(f"Loaded TTS model for {lang_code}: {model_name}")
-        except Exception as e:
-            logger.warning(f"Failed to load TTS model for {lang_code}: {str(e)}")
-    return tts_models
-tts_models = init_tts()
-def extract_audio(video_path):
-    """Extract audio from video file using ffmpeg"""
-    try:
-        logger.info(f"Extracting audio from video: {video_path}")
-        audio_path = os.path.join(OUTPUT_DIR, "audio.wav")
-        # Use ffmpeg to extract audio
-        cmd = [
-            'ffmpeg',
-            '-i', video_path,
-            '-vn',  # No video
-            '-acodec', 'pcm_s16le',  # PCM format
-            '-ar', '44100',  # Sample rate
-            '-ac', '2',  # Stereo
-            '-y',  # Overwrite output file
-            audio_path
-        ]
-        logger.info(f"Running command: {' '.join(cmd)}")
-        process = subprocess.run(cmd, capture_output=True, text=True)
-        if process.returncode != 0:
-            logger.error(f"Audio extraction failed: {process.stderr}")
-            raise Exception(f"Audio extraction failed: {process.stderr}")
-        return audio_path
-    except Exception as e:
-        logger.error(f"Audio extraction failed: {str(e)}", exc_info=True)
-        raise Exception(f"Audio extraction failed: {str(e)}")
-def generate_subtitles(audio_path):
-    """Generate subtitles using AssemblyAI"""
-    try:
-        logger.info(f"Transcribing audio with AssemblyAI: {audio_path}")
-        transcriber = aai.Transcriber()
-        transcript = transcriber.transcribe(audio_path)
-        srt_path = os.path.join(OUTPUT_DIR, "subtitles.srt")
-        logger.info(f"Saving subtitles to: {srt_path}")
-        with open(srt_path, "w", encoding="utf-8") as f:
-            f.write(transcript.export_subtitles_srt())
-        return srt_path
-    except Exception as e:
-        logger.error(f"Subtitle generation failed: {str(e)}", exc_info=True)
-        raise Exception(f"Subtitle generation failed: {str(e)}")
-def translate_subtitles(srt_path, target_langs):
-    """Translate subtitles to target languages"""
-    try:
-        logger.info(f"Loading subtitles from: {srt_path}")
-        subs = pysrt.open(srt_path, encoding="utf-8")
-        results = {}
-        for lang_code in target_langs:
-            logger.info(f"Translating to language code: {lang_code}")
-            translated_subs = subs[:]
-            translator = GoogleTranslator(source="auto", target=lang_code)
-            for i, sub in enumerate(translated_subs):
-                try:
-                    sub.text = translator.translate(sub.text)
-                    if i % 10 == 0:  # Log progress every 10 subtitles
-                        logger.info(f"Translated {i+1}/{len(translated_subs)} subtitles to {lang_code}")
-                except Exception as e:
-                    logger.warning(f"Failed to translate subtitle: {sub.text}. Error: {str(e)}")
-                    # Keep original text if translation fails
-            output_path = os.path.join(OUTPUT_DIR, f"subtitles_{lang_code}.srt")
-            logger.info(f"Saving translated subtitles to: {output_path}")
-            translated_subs.save(output_path, encoding='utf-8')
-            results[lang_code] = output_path
-        return results
-    except Exception as e:
-        logger.error(f"Translation failed: {str(e)}", exc_info=True)
-        raise Exception(f"Translation failed: {str(e)}")
-def generate_translated_audio(srt_path, target_lang):
-    """Generate translated audio using Coqui TTS"""
-    try:
-        logger.info(f"Generating translated audio for {target_lang}")
-        subs = pysrt.open(srt_path, encoding="utf-8")
-        translated_text = [sub.text for sub in subs]
-        # Create temporary directory for audio chunks
-        temp_dir = os.path.join(OUTPUT_DIR, f"temp_audio_{target_lang}")
-        os.makedirs(temp_dir, exist_ok=True)
-        # Generate TTS for each subtitle
-        audio_files = []
-        timings = []
-        # Get the appropriate TTS model
-        tts = tts_models.get(target_lang)
-        if tts is None:
-            raise Exception(f"No TTS model available for language: {target_lang}")
-        for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} speech")):
-            text = sub.text.strip()
-            if not text:
-                continue
-            # Get timing information
-            start_time = (sub.start.hours * 3600 +
-                         sub.start.minutes * 60 +
-                         sub.start.seconds +
-                         sub.start.milliseconds / 1000)
-            end_time = (sub.end.hours * 3600 +
-                       sub.end.minutes * 60 +
-                       sub.end.seconds +
-                       sub.end.milliseconds / 1000)
-            duration = end_time - start_time
-            # Generate TTS audio
-            audio_file = os.path.join(temp_dir, f"chunk_{i:04d}.wav")
             try:
-                # For multi-speaker models, we might need to specify speaker
-                tts.tts_to_file(text=text, file_path=audio_file)
-                if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0:
-                    audio_files.append(audio_file)
-                    timings.append((start_time, end_time, duration, audio_file))
-                else:
-                    logger.warning(f"Generated audio file is empty or does not exist: {audio_file}")
             except Exception as e:
-                logger.warning(f"Failed to generate TTS for: {text}. Error: {str(e)}")
-        # Check if we actually generated any audio files
-        if not audio_files:
-            logger.warning(f"No audio files were generated for {target_lang}")
-            # Create a silent audio file as fallback
-            silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
-            silent_cmd = [
-                'ffmpeg',
-                '-f', 'lavfi',
-                '-i', f'anullsrc=r=44100:cl=stereo',
-                '-t', '180',  # 3 minutes default
-                '-q:a', '0',
-                '-y',
-                silent_audio
-            ]
-            subprocess.run(silent_cmd, capture_output=True)
-            return silent_audio
-        # Create a silent audio track the same length as the original video
-        silence_file = os.path.join(temp_dir, "silence.wav")
-        try:
-            video_duration_cmd = [
-                'ffprobe',
-                '-v', 'error',
-                '-show_entries', 'format=duration',
-                '-of', 'default=noprint_wrappers=1:nokey=1',
-                os.path.join(OUTPUT_DIR, "base_video.mp4")
-            ]
-            duration_result = subprocess.run(video_duration_cmd, capture_output=True, text=True)
-            video_duration = float(duration_result.stdout.strip())
-        except Exception as e:
-            logger.warning(f"Could not determine video duration: {str(e)}. Using default of 180 seconds.")
-            video_duration = 180.0
-        # Create silent audio track
-        silent_cmd = [
-            'ffmpeg',
-            '-f', 'lavfi',
-            '-i', f'anullsrc=r=44100:cl=stereo',
-            '-t', str(video_duration),
-            '-q:a', '0',
-            '-y',
-            silence_file
-        ]
-        subprocess.run(silent_cmd, capture_output=True)
-        # Create a file with the audio mixing commands
-        filter_complex = []
-        input_count = 1  # Starting with 1 because 0 is the silence track
-        # Start with silent track
-        filter_parts = ["[0:a]"]
-        # Add each audio segment
-        for start_time, end_time, duration, audio_file in timings:
-            filter_parts.append(f"[{input_count}:a]adelay={int(start_time*1000)}|{int(start_time*1000)}")
-            input_count += 1
-        # Mix all audio tracks
-        filter_parts.append(f"amix=inputs={input_count}:dropout_transition=0:normalize=0[aout]")
-        filter_complex = ";".join(filter_parts)
-        # Build the ffmpeg command with all audio chunks
-        cmd = ['ffmpeg', '-y']
-        # Add silent base track
-        cmd.extend(['-i', silence_file])
-        # Add all audio chunks
-        for audio_file in audio_files:
-            cmd.extend(['-i', audio_file])
-        # Add filter complex and output
-        output_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
-        cmd.extend([
-            '-filter_complex', filter_complex,
-            '-map', '[aout]',
-            output_audio
-        ])
-        # Run the command
-        logger.info(f"Combining audio segments: {' '.join(cmd)}")
-        process = subprocess.run(cmd, capture_output=True)
-        if process.returncode != 0:
-            logger.error(f"Audio combination failed: {process.stderr}")
-            # Create a fallback silent audio as last resort
-            silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
-            silent_cmd = [
-                'ffmpeg',
-                '-f', 'lavfi',
-                '-i', f'anullsrc=r=44100:cl=stereo',
-                '-t', str(video_duration),
-                '-q:a', '0',
-                '-y',
-                silent_audio
-            ]
-            subprocess.run(silent_cmd, capture_output=True)
-            output_audio = silent_audio
-        # Verify the output file exists
-        if not os.path.exists(output_audio):
-            logger.error(f"Output audio file does not exist: {output_audio}")
-            # Create emergency fallback
-            silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
-            silent_cmd = [
-                'ffmpeg',
-                '-f', 'lavfi',
-                '-i', f'anullsrc=r=44100:cl=stereo',
-                '-t', '180',
-                '-q:a', '0',
-                '-y',
-                silent_audio
-            ]
-            subprocess.run(silent_cmd, capture_output=True)
-            output_audio = silent_audio
-        # Clean up temporary files
-        try:
-            shutil.rmtree(temp_dir)
-        except Exception as e:
-            logger.warning(f"Failed to clean up temp directory: {str(e)}")
-        return output_audio
-    except Exception as e:
-        logger.error(f"Audio translation failed: {str(e)}", exc_info=True)
-        # Create an emergency fallback silent audio
-        try:
-            silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
-            silent_cmd = [
-                'ffmpeg',
-                '-f', 'lavfi',
-                '-i', f'anullsrc=r=44100:cl=stereo',
-                '-t', '180',
-                '-q:a', '0',
-                '-y',
-                silent_audio
-            ]
-            subprocess.run(silent_cmd, capture_output=True)
-            return silent_audio
-        except:
-            raise Exception(f"Audio translation failed: {str(e)}")
-def combine_video_audio_subtitles(video_path, audio_path, srt_path, output_path):
-    """Combine video with translated audio and subtitles"""
-    try:
-        logger.info(f"Combining video, audio, and subtitles")
-        # Verify that all input files exist
-        if not os.path.exists(video_path):
-            raise Exception(f"Video file does not exist: {video_path}")
-        if not os.path.exists(audio_path):
-            raise Exception(f"Audio file does not exist: {audio_path}")
-        if not os.path.exists(srt_path):
-            raise Exception(f"Subtitle file does not exist: {srt_path}")
-        logger.info(f"Input files verified: Video: {os.path.getsize(video_path)} bytes, Audio: {os.path.getsize(audio_path)} bytes, Subtitles: {os.path.getsize(srt_path)} bytes")
-        # Create a safe version of the subtitle path
-        safe_srt_path = srt_path.replace(" ", "\\ ").replace(":", "\\:")
-        # Command to combine video with translated audio and subtitles
         try:
-            # Attempt method 1: Using subtitles filter
-            cmd = [
-                'ffmpeg',
-                '-i', video_path,           # Input video
-                '-i', audio_path,           # Input translated audio
-                '-map', '0:v',              # Use video from first input
-                '-map', '1:a',              # Use audio from second input
-                '-vf', f"subtitles={safe_srt_path}:force_style='FontSize=24,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,BorderStyle=3'",  # Burn subtitles
-                '-c:v', 'libx264',          # Video codec
-                '-c:a', 'aac',              # Audio codec
-                '-shortest',                # End when shortest input ends
-                '-y',                       # Overwrite output file
-                output_path
-            ]
-            logger.info(f"Running command: {' '.join(cmd)}")
-            process = subprocess.run(cmd, capture_output=True, text=True)
-            if process.returncode != 0:
-                logger.warning(f"First method failed: {process.stderr}")
-                raise Exception("First method failed")
         except Exception as e:
-            logger.warning(f"First method failed: {str(e)}")
-            try:
-                # Attempt method 2: Using hardcoded subtitles approach
-                temp_srt_dir = os.path.join(OUTPUT_DIR, "temp_srt")
-                os.makedirs(temp_srt_dir, exist_ok=True)
-                # Copy the SRT file to the temp directory
-                temp_srt_path = os.path.join(temp_srt_dir, "temp.srt")
-                shutil.copy(srt_path, temp_srt_path)
-                cmd = [
-                    'ffmpeg',
-                    '-i', video_path,
-                    '-i', audio_path,
-                    '-map', '0:v',
-                    '-map', '1:a',
-                    '-vf', f"subtitles={temp_srt_path}",
-                    '-c:v', 'libx264',
-                    '-c:a', 'aac',
-                    '-shortest',
-                    '-y',
-                    output_path
-                ]
-                logger.info(f"Running second method: {' '.join(cmd)}")
-                process = subprocess.run(cmd, capture_output=True, text=True)
-                if process.returncode != 0:
-                    logger.warning(f"Second method failed: {process.stderr}")
-                    raise Exception("Second method failed")
-                # Clean up temp directory
-                shutil.rmtree(temp_srt_dir)
-            except Exception as e:
-                logger.warning(f"Second method failed: {str(e)}")
-                # Attempt method 3: No subtitles as last resort
-                cmd = [
-                    'ffmpeg',
-                    '-i', video_path,
-                    '-i', audio_path,
-                    '-map', '0:v',
-                    '-map', '1:a',
-                    '-c:v', 'libx264',
-                    '-c:a', 'aac',
-                    '-shortest',
-                    '-y',
-                    output_path
-                ]
-                logger.info(f"Running fallback method (no subtitles): {' '.join(cmd)}")
-                process = subprocess.run(cmd, capture_output=True, text=True)
-                if process.returncode != 0:
-                    logger.error(f"All methods failed: {process.stderr}")
-                    raise Exception(f"Failed to combine video and audio: {process.stderr}")
-                else:
-                    logger.warning("Created video without subtitles as fallback")
-        # Verify the output file exists and has a reasonable size
-        if not os.path.exists(output_path):
-            raise Exception(f"Output file does not exist: {output_path}")
-        if os.path.getsize(output_path) < 1000:
-            raise Exception(f"Output file is too small: {os.path.getsize(output_path)} bytes")
-        logger.info(f"Successfully created output file: {output_path} ({os.path.getsize(output_path)} bytes)")
-        return output_path
-    except Exception as e:
-        logger.error(f"Combining failed: {str(e)}", exc_info=True)
-        raise Exception(f"Combining failed: {str(e)}")
-def process_video(video_file, source_lang, target_langs, progress=gr.Progress()):
-    """Process video with translation of both subtitles and audio"""
     try:
-        progress(0.05, "Starting processing...")
-        logger.info(f"Processing video: {video_file}")
-        # Make sure we have ffmpeg installed
-        try:
-            subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
-            logger.info("ffmpeg is installed and working")
-        except (subprocess.SubprocessError, FileNotFoundError):
-            error_msg = "ffmpeg is not installed or not in PATH. Please install ffmpeg."
-            logger.error(error_msg)
-            return None, error_msg
-        # Extract audio
         progress(0.1, "Extracting audio...")
         audio_path = extract_audio(video_file)
-        # Generate subtitles
-        progress(0.25, "Generating subtitles...")
-        srt_path = generate_subtitles(audio_path)
-        # Translate subtitles
-        progress(0.4, "Translating subtitles...")
-        target_lang_codes = [LANGUAGES[lang] for lang in target_langs]
-        translated_subs = translate_subtitles(srt_path, target_lang_codes)
-        # Create a copy of the video file in our output directory
         base_video = os.path.join(OUTPUT_DIR, "base_video.mp4")
         shutil.copy(video_file, base_video)
-        # Process each target language
-        output_videos = []
-        for i, (lang_code, sub_path) in enumerate(translated_subs.items()):
-            lang_name = next(name for name, code in LANGUAGES.items() if code == lang_code)
-            progress(0.5 + (i * 0.5 / len(translated_subs)), f"Processing {lang_name}...")
-            try:
-                # Generate translated audio
-                logger.info(f"Generating translated audio for {lang_code}")
-                translated_audio = generate_translated_audio(sub_path, lang_code)
-                # Verify audio file exists
-                if not os.path.exists(translated_audio):
-                    logger.error(f"Translated audio file does not exist: {translated_audio}")
-                    continue
-                # Combine video, translated audio, and subtitles
-                output_path = os.path.join(OUTPUT_DIR, f"output_{lang_code}.mp4")
-                logger.info(f"Creating final video with {lang_code} audio and subtitles")
-                output_video = combine_video_audio_subtitles(
-                    base_video,
-                    translated_audio,
-                    sub_path,
-                    output_path
-                )
-                # Verify the output file exists and has content
-                if os.path.exists(output_video) and os.path.getsize(output_video) > 1000:
-                    logger.info(f"Successfully created output file: {output_video}")
-                    output_videos.append(output_video)
-                else:
-                    logger.warning(f"Output file is missing or too small: {output_video}")
-            except Exception as e:
-                logger.error(f"Failed to process {lang_code}: {str(e)}")
-        # If all output videos failed, return the original
-        if not output_videos:
-            logger.warning("All translations failed, returning original video")
-            return base_video, "Failed to translate video, returning original"
-        progress(1.0, "Done!")
-        message = f"Processing complete. Created {len(output_videos)} translated videos."
-        logger.info(message)
-        return output_videos[0], message
     except Exception as e:
         logger.error(f"Processing failed: {str(e)}", exc_info=True)
-        return None, f"Processing failed: {str(e)}"
-with gr.Blocks() as demo:
-    gr.Markdown("# Complete Video Translation System")
-    gr.Markdown("Translates both subtitles and audio to target languages")
-    with gr.Row():
-        with gr.Column(scale=1):
-            video_input = gr.Video(label="Upload Video")
-            source_lang = gr.Dropdown(
-                label="Source Language",
-                choices=list(LANGUAGES.keys()),
-                value="English"
-            )
-            target_langs = gr.CheckboxGroup(
-                label="Target Languages (Both Audio & Subtitles)",
-                choices=list(LANGUAGES.keys()),
-                value=["Spanish"]
-            )
-            submit_btn = gr.Button("Translate", variant="primary")
-        with gr.Column(scale=2):
-            output_video = gr.Video(label="Translated Video")
-            status_text = gr.Textbox(label="Status", interactive=False)
-            output_info = gr.Markdown("Output videos will be saved in the 'outputs' directory")
-    submit_btn.click(
-        process_video,
-        inputs=[video_input, source_lang, target_langs],
-        outputs=[output_video, status_text]
-    )
 if __name__ == "__main__":
-    # Check dependencies at startup
-    missing_deps = []
-    # Check ffmpeg
-    try:
-        version_info = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True)
-        ffmpeg_version = version_info.stdout.split('\n')[0]
-        logger.info(f"ffmpeg version: {ffmpeg_version}")
-    except:
-        logger.warning("ffmpeg not found - required for video processing")
-        missing_deps.append("ffmpeg")
-    # Check Python dependencies
-    try:
-        import assemblyai
-        logger.info("AssemblyAI package found")
-    except ImportError:
-        logger.warning("AssemblyAI package not found - required for transcription")
-        missing_deps.append("assemblyai")
-    try:
-        import TTS
-        logger.info("Coqui TTS package found")
-    except ImportError:
-        logger.warning("Coqui TTS package not found - required for text-to-speech")
-        missing_deps.append("TTS")
     try:
-        import deep_translator
-        logger.info("deep_translator package found")
-    except ImportError:
-        logger.warning("deep_translator package not found - required for translation")
-        missing_deps.append("deep_translator")
-    # Print installation instructions if dependencies are missing
-    if missing_deps:
-        logger.warning("Missing dependencies detected. Please install:")
-        if "ffmpeg" in missing_deps:
-            logger.warning("- ffmpeg: https://ffmpeg.org/download.html")
-        python_deps = [dep for dep in missing_deps if dep != "ffmpeg"]
-        if python_deps:
-            deps_str = " ".join(python_deps)
-            logger.warning(f"- Python packages: pip install {deps_str}")
-    # Start the app
-    demo.launch()

 import gradio as gr
 import os
 import subprocess
+import torch
+from TTS.api import TTS
 from deep_translator import GoogleTranslator
 import pysrt
+import whisper  # Free speech-to-text
+import webvtt
 import shutil
 import time
 from tqdm import tqdm
+from typing import Dict, List, Optional
+import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO,
+                   format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 # Configuration
 LANGUAGES = {
+    "English": {"code": "en", "speakers": ["default"], "whisper": "en"},
+    "Spanish": {"code": "es", "speakers": ["default"], "whisper": "es"},
+    "French": {"code": "fr", "speakers": ["default"], "whisper": "fr"},
+    "German": {"code": "de", "speakers": ["thorsten", "eva_k"], "whisper": "de"},
+    "Japanese": {"code": "ja", "speakers": ["default"], "whisper": "ja"},
+    "Hindi": {"code": "hi", "speakers": ["default"], "whisper": "hi"}
 }
+SUBTITLE_STYLES = {
+    "Default": "",
+    "White Text": "color: white;",
+    "Yellow Text": "color: yellow;",
+    "Large Text": "font-size: 24px;",
+    "Bold Text": "font-weight: bold;",
+    "Black Background": "background-color: black; padding: 5px;"
 }
+# Create output directory
+OUTPUT_DIR = "outputs"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 # Initialize TTS
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tts_models = {
+    "en": TTS("tts_models/en/ljspeech/tacotron2-DDC").to(device),
+    "es": TTS("tts_models/es/css10/vits").to(device),
+    "fr": TTS("tts_models/fr/css10/vits").to(device),
+    "de": TTS("tts_models/de/thorsten/tacotron2-DDC").to(device),
+    "ja": TTS("tts_models/ja/kokoro/tacotron2-DDC").to(device),
+    "hi": TTS("tts_models/hi/kb/tacotron2-DDC").to(device)
+}
+# Initialize Whisper (load when needed)
+whisper_model = None
+def get_whisper_model():
+    global whisper_model
+    if whisper_model is None:
+        whisper_model = whisper.load_model("small")
+    return whisper_model
+def extract_audio(video_path: str) -> str:
+    """Extract audio using ffmpeg"""
+    audio_path = os.path.join(OUTPUT_DIR, "audio.wav")
+    cmd = [
+        'ffmpeg', '-i', video_path, '-vn',
+        '-acodec', 'pcm_s16le', '-ar', '16000',
+        '-ac', '1', '-y', audio_path
+    ]
+    subprocess.run(cmd, check=True)
+    return audio_path
+def transcribe_with_whisper(audio_path: str, language: str = None) -> str:
+    """Transcribe audio using Whisper"""
+    model = get_whisper_model()
+    result = model.transcribe(audio_path, language=language, word_timestamps=True)
+    return result
+def generate_srt_from_whisper(audio_path: str, language: str) -> str:
+    """Generate SRT subtitles from Whisper output"""
+    result = transcribe_with_whisper(audio_path, language)
+    subs = pysrt.SubRipFile()
+    for i, segment in enumerate(result["segments"]):
+        subs.append(pysrt.SubRipItem(
+            index=i+1,
+            start=pysrt.SubRipTime(seconds=segment["start"]),
+            end=pysrt.SubRipTime(seconds=segment["end"]),
+            text=segment["text"]
+        ))
+    srt_path = os.path.join(OUTPUT_DIR, "subtitles.srt")
+    subs.save(srt_path, encoding='utf-8')
+    return srt_path
+def detect_language(audio_path: str) -> str:
+    """Detect language using Whisper"""
+    result = transcribe_with_whisper(audio_path)
+    detected_code = result["language"]
+    for name, data in LANGUAGES.items():
+        if data["whisper"] == detected_code:
+            return name
+    return "English"
+def translate_subtitles(srt_path: str, target_langs: List[str]) -> Dict[str, str]:
+    """Translate subtitles to multiple languages"""
+    subs = pysrt.open(srt_path)
+    results = {}
+    for lang_name in target_langs:
+        lang_code = LANGUAGES[lang_name]["code"]
+        translated_subs = subs[:]
+        translator = GoogleTranslator(source='auto', target=lang_code)
+        for sub in translated_subs:
             try:
+                sub.text = translator.translate(sub.text)
             except Exception as e:
+                logger.warning(f"Translation failed: {str(e)}")
+                continue
+        output_path = os.path.join(OUTPUT_DIR, f"subtitles_{lang_code}.srt")
+        translated_subs.save(output_path, encoding='utf-8')
+        results[lang_code] = output_path
+    return results
+def generate_webvtt_subtitles(srt_path: str, style: str = "") -> str:
+    """Convert SRT to WebVTT with optional styling"""
+    subs = pysrt.open(srt_path)
+    lang_code = os.path.basename(srt_path).split('_')[-1].replace('.srt', '')
+    vtt_path = os.path.join(OUTPUT_DIR, f"subtitles_{lang_code}.vtt")
+    with open(vtt_path, 'w', encoding='utf-8') as f:
+        f.write("WEBVTT\n\n")
+        if style:
+            f.write(f"STYLE\n::cue {{\n{style}\n}}\n\n")
+        for sub in subs:
+            start = sub.start.to_time().strftime('%H:%M:%S.%f')[:-3]
+            end = sub.end.to_time().strftime('%H:%M:%S.%f')[:-3]
+            f.write(f"{start} --> {end}\n")
+            f.write(f"{sub.text}\n\n")
+    return vtt_path
+def generate_translated_audio(
+    srt_path: str,
+    target_lang: str,
+    speaker: str = "default"
+) -> str:
+    """Generate translated audio using TTS"""
+    subs = pysrt.open(srt_path)
+    temp_dir = os.path.join(OUTPUT_DIR, f"temp_audio_{target_lang}")
+    os.makedirs(temp_dir, exist_ok=True)
+    audio_files = []
+    timings = []
+    tts = tts_models.get(target_lang)
+    for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} audio")):
+        text = sub.text.strip()
+        if not text:
+            continue
+        start_time = sub.start.ordinal / 1000
+        audio_file = os.path.join(temp_dir, f"chunk_{i:04d}.wav")
         try:
+            kwargs = {"speaker": speaker} if speaker != "default" and hasattr(tts, 'synthesizer') else {}
+            tts.tts_to_file(text=text, file_path=audio_file, **kwargs)
+            audio_files.append(audio_file)
+            timings.append((start_time, audio_file))
         except Exception as e:
+            logger.warning(f"TTS failed: {str(e)}")
+    if not audio_files:
+        raise Exception("No audio generated")
+    # Create silent audio
+    video_duration = get_video_duration(os.path.join(OUTPUT_DIR, "base_video.mp4"))
+    silence_file = os.path.join(temp_dir, "silence.wav")
+    subprocess.run([
+        'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=stereo',
+        '-t', str(video_duration), '-y', silence_file
+    ], check=True)
+    # Mix audio
+    filter_complex = "[0:a]" + "".join(
+        f"[{i+1}:a]adelay={int(start*1000)}|{int(start*1000)}[a{i}];" +
+        f"[a{i-1 if i>0 else 'out'}]" + f"[a{i}]amix=inputs=2[aout]"
+        for i, (start, _) in enumerate(timings)
+    )
+    cmd = ['ffmpeg', '-y', '-i', silence_file] + \
+          [f'-i {f}' for f in audio_files] + [
+          '-filter_complex', filter_complex,
+          '-map', '[aout]',
+          os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")]
+    subprocess.run(' '.join(cmd), shell=True, check=True)
+    shutil.rmtree(temp_dir)
+    return os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
+def get_video_duration(video_path: str) -> float:
+    """Get video duration in seconds"""
+    result = subprocess.run([
+        'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
+        '-of', 'default=noprint_wrappers=1:nokey=1', video_path
+    ], capture_output=True, text=True)
+    return float(result.stdout.strip() or 180)
+def create_html_player(
+    video_path: str,
+    subtitle_paths: Dict[str, str],
+    style: str = ""
+) -> str:
+    """Create HTML player with video and subtitles"""
+    html_path = os.path.join(OUTPUT_DIR, "player.html")
+    video_name = os.path.basename(video_path)
+    subtitle_tracks = "\n".join(
+        f'<track kind="subtitles" src="{os.path.basename(path)}" '
+        f'srclang="{lang}" label="{lang.capitalize()}" '
+        f'{"default" if lang == "en" else ""}>'
+        for lang, path in subtitle_paths.items()
+    )
+    style_block = f"video::cue {{ {style} }}" if style else ""
+    html_content = f"""<!DOCTYPE html>
+<html>
+<head>
+    <title>Video Player</title>
+    <style>
+        body {{ font-family: Arial, sans-serif; margin: 20px; }}
+        .container {{ max-width: 800px; margin: 0 auto; }}
+        video {{ width: 100%; background: #000; }}
+        .downloads {{ margin-top: 20px; }}
+        {style_block}
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h2>Video Player with Subtitles</h2>
+        <video controls>
+            <source src="{video_name}" type="video/mp4">
+            {subtitle_tracks}
+        </video>
+        <div class="downloads">
+            <h3>Download Subtitles:</h3>
+            {"".join(
+                f'<a href="{os.path.basename(path)}" download>'
+                f'{lang.upper()} Subtitles (.vtt)</a><br>'
+                for lang, path in subtitle_paths.items()
+            )}
+        </div>
+    </div>
+</body>
+</html>"""
+    with open(html_path, 'w', encoding='utf-8') as f:
+        f.write(html_content)
+    return html_path
+def process_video(
+    video_file: str,
+    source_lang: str,
+    target_langs: List[str],
+    subtitle_style: str,
+    speaker_settings: Dict[str, str],
+    progress: gr.Progress = gr.Progress()
+) -> List[str]:
+    """Complete video processing pipeline"""
     try:
+        progress(0.05, "Initializing...")
+        # 1. Extract audio
         progress(0.1, "Extracting audio...")
         audio_path = extract_audio(video_file)
+        # 2. Detect language if needed
+        if source_lang == "Auto-detect":
+            source_lang = detect_language(audio_path)
+            progress(0.15, f"Detected language: {source_lang}")
+        # 3. Generate subtitles
+        progress(0.2, "Generating subtitles...")
+        srt_path = generate_srt_from_whisper(
+            audio_path,
+            LANGUAGES[source_lang]["whisper"]
+        )
+        # 4. Translate subtitles
+        progress(0.3, "Translating subtitles...")
+        translated_subs = translate_subtitles(srt_path, target_langs)
+        # 5. Save original video
         base_video = os.path.join(OUTPUT_DIR, "base_video.mp4")
         shutil.copy(video_file, base_video)
+        # 6. Process each target language
+        translated_vtts = {}
+        for i, lang_name in enumerate(target_langs, 1):
+            lang_code = LANGUAGES[lang_name]["code"]
+            progress(0.4 + (i * 0.5 / len(target_langs)), f"Processing {lang_name}...")
+            # Generate audio
+            translated_audio = generate_translated_audio(
+                translated_subs[lang_code],
+                lang_code,
+                speaker_settings.get(lang_code, "default")
+            )
+            # Generate subtitles
+            vtt_path = generate_webvtt_subtitles(
+                translated_subs[lang_code],
+                SUBTITLE_STYLES.get(subtitle_style, "")
+            )
+            translated_vtts[lang_code] = vtt_path
+            # Create translated video version
+            output_video = os.path.join(OUTPUT_DIR, f"output_{lang_code}.mp4")
+            subprocess.run([
+                'ffmpeg', '-i', base_video, '-i', translated_audio,
+                '-map', '0:v', '-map', '1:a', '-c:v', 'copy', '-c:a', 'aac',
+                '-y', output_video
+            ], check=True)
+        # 7. Create HTML player
+        progress(0.9, "Creating HTML player...")
+        html_path = create_html_player(
+            base_video,
+            translated_vtts,
+            SUBTITLE_STYLES.get(subtitle_style, "")
+        )
+        # Prepare all output files
+        output_files = [html_path, base_video] + \
+                      list(translated_vtts.values()) + \
+                      [os.path.join(OUTPUT_DIR, f"output_{LANGUAGES[lang]['code']}.mp4")
+                       for lang in target_langs]
+        progress(1.0, "Done!")
+        return output_files, "Processing completed successfully!"
     except Exception as e:
         logger.error(f"Processing failed: {str(e)}", exc_info=True)
+        return None, f"Error: {str(e)}"
+def get_speaker_settings(*args) -> Dict[str, str]:
+    """Create speaker settings dictionary from inputs"""
+    settings = {}
+    for i, lang in enumerate(LANGUAGES.keys()):
+        if i < len(args) and args[i]:
+            settings[LANGUAGES[lang]["code"]] = args[i]
+    return settings
+def create_interface():
+    """Create Gradio interface"""
+    with gr.Blocks(title="Video Translator") as demo:
+        gr.Markdown("# Free Video Translation System")
+        gr.Markdown("Translate videos with subtitles and audio dubbing using free/open-source tools")
+        with gr.Row():
+            with gr.Column(scale=1):
+                video_input = gr.Video(label="Upload Video")
+                with gr.Accordion("Source Settings", open=True):
+                    source_lang = gr.Dropdown(
+                        label="Source Language",
+                        choices=["Auto-detect"] + list(LANGUAGES.keys()),
+                        value="Auto-detect"
+                    )
+                with gr.Accordion("Target Languages", open=True):
+                    target_langs = gr.CheckboxGroup(
+                        label="Select target languages",
+                        choices=list(LANGUAGES.keys()),
+                        value=["English", "Spanish"]
+                    )
+                with gr.Accordion("Subtitle Styling", open=False):
+                    subtitle_style = gr.Dropdown(
+                        label="Subtitle Appearance",
+                        choices=list(SUBTITLE_STYLES.keys()),
+                        value="Default"
+                    )
+                with gr.Accordion("Voice Settings", open=False):
+                    speaker_inputs = []
+                    for lang_name in LANGUAGES.keys():
+                        speakers = LANGUAGES[lang_name]["speakers"]
+                        if len(speakers) > 1:
+                            speaker_inputs.append(
+                                gr.Dropdown(
+                                    label=f"{lang_name} Speaker",
+                                    choices=speakers,
+                                    value=speakers[0],
+                                    visible=False
+                                )
+                            )
+                        else:
+                            speaker_inputs.append(gr.Textbox(visible=False))
+                submit_btn = gr.Button("Translate Video", variant="primary")
+            with gr.Column(scale=2):
+                output_files = gr.Files(label="Download Files")
+                status = gr.Textbox(label="Status")
+                gr.Markdown("""
+                **Instructions:**
+                1. Upload a video file
+                2. Select source and target languages
+                3. Customize subtitles and voices
+                4. Click Translate
+                5. Download the HTML player and open in browser
+                """)
+        def update_speaker_ui(selected_langs):
+            updates = []
+            for i, lang_name in enumerate(LANGUAGES.keys()):
+                visible = lang_name in selected_langs and len(LANGUAGES[lang_name]["speakers"]) > 1
+                updates.append(gr.Dropdown.update(visible=visible))
+            return updates
+        target_langs.change(
+            update_speaker_ui,
+            inputs=target_langs,
+            outputs=speaker_inputs
+        )
+        submit_btn.click(
+            process_video,
+            inputs=[
+                video_input,
+                source_lang,
+                target_langs,
+                subtitle_style,
+                gr.State(lambda: get_speaker_settings(*speaker_inputs))
+            ],
+            outputs=[output_files, status]
+        )
+    return demo
 if __name__ == "__main__":
+    # Check requirements
     try:
+        subprocess.run(["ffmpeg", "-version"], check=True)
+        import torch, whisper
+        demo = create_interface()
+        demo.launch()
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        print("Please install all requirements: pip install -r requirements.txt")