Spaces:

Nishur
/

video_translator

Running

App Files Files Community

Nishur commited on Apr 8

Commit

100847c

verified ·

1 Parent(s): 969bd9d

Update app.py

Browse files

Files changed (1) hide show

app.py +236 -44

app.py CHANGED Viewed

@@ -9,6 +9,9 @@ import logging
 import sys
 import shutil
 from pathlib import Path
 # Set up logging
 logging.basicConfig(level=logging.INFO,
@@ -27,6 +30,16 @@ LANGUAGES = {
     "Hindi": "hi"
 }
 # Create a permanent output directory
 OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
 os.makedirs(OUTPUT_DIR, exist_ok=True)
@@ -110,20 +123,141 @@ def translate_subtitles(srt_path, target_langs):
         logger.error(f"Translation failed: {str(e)}", exc_info=True)
         raise Exception(f"Translation failed: {str(e)}")
-def burn_subtitles_ffmpeg(video_path, srt_path, output_path):
-    """Burn subtitles directly into the video using ffmpeg"""
     try:
-        logger.info(f"Burning subtitles into video using ffmpeg")
         # Escape special characters in paths for ffmpeg filters
         escaped_srt_path = srt_path.replace(":", "\\:").replace("'", "\\'").replace(" ", "\\ ")
-        # Command to burn subtitles into video
         cmd = [
             'ffmpeg',
             '-i', video_path,           # Input video
             '-vf', f"subtitles={escaped_srt_path}:force_style='FontSize=24,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,BorderStyle=3'",  # Burn subtitles
-            '-c:a', 'copy',             # Copy audio codec
             '-y',                       # Overwrite output file
             output_path
         ]
@@ -132,16 +266,21 @@ def burn_subtitles_ffmpeg(video_path, srt_path, output_path):
         process = subprocess.run(cmd, capture_output=True, text=True)
         if process.returncode != 0:
-            logger.error(f"Subtitle burning failed: {process.stderr}")
             # Try alternative method
-            logger.info("Trying alternative method for subtitle burning")
             cmd = [
                 'ffmpeg',
-                '-i', video_path,           # Input video
-                '-vf', f"subtitles='{srt_path}'",  # Simpler subtitle filter
-                '-c:a', 'copy',             # Copy audio codec
-                '-y',                       # Overwrite output file
                 output_path
             ]
@@ -149,18 +288,18 @@ def burn_subtitles_ffmpeg(video_path, srt_path, output_path):
             process = subprocess.run(cmd, capture_output=True, text=True)
             if process.returncode != 0:
-                logger.error(f"Alternative subtitle burning failed: {process.stderr}")
-                raise Exception(f"Failed to burn subtitles: {process.stderr}")
         return output_path
     except Exception as e:
-        logger.error(f"Subtitle burning failed: {str(e)}", exc_info=True)
-        raise Exception(f"Subtitle burning failed: {str(e)}")
 def process_video(video_file, source_lang, target_langs, progress=gr.Progress()):
-    """Process video with translation"""
     try:
-        progress(0.1, "Starting processing...")
         logger.info(f"Processing video: {video_file}")
         # Make sure we have ffmpeg installed
@@ -173,32 +312,44 @@ def process_video(video_file, source_lang, target_langs, progress=gr.Progress())
             return None, error_msg
         # Extract audio
-        progress(0.2, "Extracting audio...")
         audio_path = extract_audio(video_file)
         # Generate subtitles
-        progress(0.4, "Generating subtitles...")
         srt_path = generate_subtitles(audio_path)
         # Translate subtitles
-        progress(0.6, "Translating subtitles...")
         target_lang_codes = [LANGUAGES[lang] for lang in target_langs]
         translated_subs = translate_subtitles(srt_path, target_lang_codes)
-        # Add subtitles to video for each language
-        progress(0.8, "Creating output videos...")
-        output_videos = []
-        # Create a copy of the video file in our output directory first
         base_video = os.path.join(OUTPUT_DIR, "base_video.mp4")
         shutil.copy(video_file, base_video)
-        for lang_code, sub_path in translated_subs.items():
-            output_path = os.path.join(OUTPUT_DIR, f"output_{lang_code}.mp4")
-            logger.info(f"Adding {lang_code} subtitles to video: {output_path}")
             try:
-                output_video = burn_subtitles_ffmpeg(base_video, sub_path, output_path)
                 # Verify the output file exists and has content
                 if os.path.exists(output_video) and os.path.getsize(output_video) > 1000:
@@ -207,25 +358,28 @@ def process_video(video_file, source_lang, target_langs, progress=gr.Progress())
                 else:
                     logger.warning(f"Output file is missing or too small: {output_video}")
             except Exception as e:
-                logger.error(f"Failed to create video with {lang_code} subtitles: {str(e)}")
         # If all output videos failed, return the original
         if not output_videos:
-            logger.warning("All subtitle additions failed, returning original video")
-            return base_video, "Failed to add subtitles to video, returning original"
         progress(1.0, "Done!")
-        return output_videos[0], f"Processing complete. Video saved to: {output_videos[0]}"
     except Exception as e:
         logger.error(f"Processing failed: {str(e)}", exc_info=True)
         return None, f"Processing failed: {str(e)}"
 with gr.Blocks() as demo:
-    gr.Markdown("# Video Translation System")
     with gr.Row():
-        with gr.Column():
             video_input = gr.Video(label="Upload Video")
             source_lang = gr.Dropdown(
                 label="Source Language",
@@ -233,15 +387,16 @@ with gr.Blocks() as demo:
                 value="English"
             )
             target_langs = gr.CheckboxGroup(
-                label="Target Languages",
                 choices=list(LANGUAGES.keys()),
                 value=["Spanish"]
             )
-            submit_btn = gr.Button("Translate")
-        with gr.Column():
             output_video = gr.Video(label="Translated Video")
             status_text = gr.Textbox(label="Status", interactive=False)
     submit_btn.click(
         process_video,
@@ -250,13 +405,50 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    # Display ffmpeg version at startup
     try:
         version_info = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True)
-        # Fix for the backslash in f-string issue
-        first_line = version_info.stdout.split('\n')[0]
-        logger.info(f"ffmpeg version info: {first_line}")
     except:
-        logger.warning("Could not determine ffmpeg version")
     demo.launch()

 import sys
 import shutil
 from pathlib import Path
+import time
+from tqdm import tqdm
+from gtts import gTTS
 # Set up logging
 logging.basicConfig(level=logging.INFO,
     "Hindi": "hi"
 }
+# TTS voice mapping for different languages
+TTS_VOICES = {
+    "en": "en-US",
+    "es": "es-ES",
+    "fr": "fr-FR",
+    "de": "de-DE",
+    "ja": "ja-JP",
+    "hi": "hi-IN"
+}
 # Create a permanent output directory
 OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
 os.makedirs(OUTPUT_DIR, exist_ok=True)
         logger.error(f"Translation failed: {str(e)}", exc_info=True)
         raise Exception(f"Translation failed: {str(e)}")
+def generate_translated_audio(srt_path, target_lang):
+    """Generate translated audio using text-to-speech"""
+    try:
+        logger.info(f"Generating translated audio for {target_lang}")
+        subs = pysrt.open(srt_path, encoding="utf-8")
+        translated_text = [sub.text for sub in subs]
+        # Create temporary directory for audio chunks
+        temp_dir = os.path.join(OUTPUT_DIR, f"temp_audio_{target_lang}")
+        os.makedirs(temp_dir, exist_ok=True)
+        # Generate TTS for each subtitle
+        audio_files = []
+        timings = []
+        for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} speech")):
+            text = sub.text.strip()
+            if not text:
+                continue
+            # Get timing information
+            start_time = (sub.start.hours * 3600 +
+                         sub.start.minutes * 60 +
+                         sub.start.seconds +
+                         sub.start.milliseconds / 1000)
+            end_time = (sub.end.hours * 3600 +
+                       sub.end.minutes * 60 +
+                       sub.end.seconds +
+                       sub.end.milliseconds / 1000)
+            duration = end_time - start_time
+            # Generate TTS audio
+            tts_lang = TTS_VOICES.get(target_lang, target_lang)
+            audio_file = os.path.join(temp_dir, f"chunk_{i:04d}.mp3")
+            try:
+                tts = gTTS(text=text, lang=target_lang, slow=False)
+                tts.save(audio_file)
+                audio_files.append(audio_file)
+                timings.append((start_time, end_time, duration, audio_file))
+            except Exception as e:
+                logger.warning(f"Failed to generate TTS for: {text}. Error: {str(e)}")
+        # Create a silent audio track the same length as the original video
+        silence_file = os.path.join(temp_dir, "silence.wav")
+        video_duration_cmd = [
+            'ffprobe',
+            '-v', 'error',
+            '-show_entries', 'format=duration',
+            '-of', 'default=noprint_wrappers=1:nokey=1',
+            os.path.join(OUTPUT_DIR, "base_video.mp4")
+        ]
+        duration_result = subprocess.run(video_duration_cmd, capture_output=True, text=True)
+        video_duration = float(duration_result.stdout.strip())
+        # Create silent audio track
+        silent_cmd = [
+            'ffmpeg',
+            '-f', 'lavfi',
+            '-i', f'anullsrc=r=44100:cl=stereo',
+            '-t', str(video_duration),
+            '-q:a', '0',
+            '-y',
+            silence_file
+        ]
+        subprocess.run(silent_cmd, capture_output=True)
+        # Create a file with the audio mixing commands
+        filter_complex = []
+        input_count = 1  # Starting with 1 because 0 is the silence track
+        # Start with silent track
+        filter_parts = ["[0:a]"]
+        # Add each audio segment
+        for start_time, end_time, duration, audio_file in timings:
+            filter_parts.append(f"[{input_count}:a]adelay={int(start_time*1000)}|{int(start_time*1000)}")
+            input_count += 1
+        # Mix all audio tracks
+        filter_parts.append(f"amix=inputs={input_count}:dropout_transition=0:normalize=0[aout]")
+        filter_complex = ";".join(filter_parts)
+        # Build the ffmpeg command with all audio chunks
+        cmd = ['ffmpeg', '-y']
+        # Add silent base track
+        cmd.extend(['-i', silence_file])
+        # Add all audio chunks
+        for audio_file in audio_files:
+            cmd.extend(['-i', audio_file])
+        # Add filter complex and output
+        output_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
+        cmd.extend([
+            '-filter_complex', filter_complex,
+            '-map', '[aout]',
+            output_audio
+        ])
+        # Run the command
+        logger.info(f"Combining audio segments: {' '.join(cmd)}")
+        subprocess.run(cmd, capture_output=True)
+        # Clean up temporary files
+        shutil.rmtree(temp_dir)
+        return output_audio
+    except Exception as e:
+        logger.error(f"Audio translation failed: {str(e)}", exc_info=True)
+        raise Exception(f"Audio translation failed: {str(e)}")
+def combine_video_audio_subtitles(video_path, audio_path, srt_path, output_path):
+    """Combine video with translated audio and subtitles"""
     try:
+        logger.info(f"Combining video, audio, and subtitles")
         # Escape special characters in paths for ffmpeg filters
         escaped_srt_path = srt_path.replace(":", "\\:").replace("'", "\\'").replace(" ", "\\ ")
+        # Command to combine video with translated audio and subtitles
         cmd = [
             'ffmpeg',
             '-i', video_path,           # Input video
+            '-i', audio_path,           # Input translated audio
+            '-map', '0:v',              # Use video from first input
+            '-map', '1:a',              # Use audio from second input
             '-vf', f"subtitles={escaped_srt_path}:force_style='FontSize=24,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,BorderStyle=3'",  # Burn subtitles
+            '-c:v', 'libx264',          # Video codec
+            '-c:a', 'aac',              # Audio codec
+            '-shortest',                # End when shortest input ends
             '-y',                       # Overwrite output file
             output_path
         ]
         process = subprocess.run(cmd, capture_output=True, text=True)
         if process.returncode != 0:
+            logger.error(f"Combining failed: {process.stderr}")
             # Try alternative method
+            logger.info("Trying alternative method")
             cmd = [
                 'ffmpeg',
+                '-i', video_path,
+                '-i', audio_path,
+                '-map', '0:v',
+                '-map', '1:a',
+                '-vf', f"subtitles='{srt_path}'",
+                '-c:v', 'libx264',
+                '-c:a', 'aac',
+                '-shortest',
+                '-y',
                 output_path
             ]
             process = subprocess.run(cmd, capture_output=True, text=True)
             if process.returncode != 0:
+                logger.error(f"Alternative method failed: {process.stderr}")
+                raise Exception(f"Failed to combine video, audio, and subtitles: {process.stderr}")
         return output_path
     except Exception as e:
+        logger.error(f"Combining failed: {str(e)}", exc_info=True)
+        raise Exception(f"Combining failed: {str(e)}")
 def process_video(video_file, source_lang, target_langs, progress=gr.Progress()):
+    """Process video with translation of both subtitles and audio"""
     try:
+        progress(0.05, "Starting processing...")
         logger.info(f"Processing video: {video_file}")
         # Make sure we have ffmpeg installed
             return None, error_msg
         # Extract audio
+        progress(0.1, "Extracting audio...")
         audio_path = extract_audio(video_file)
         # Generate subtitles
+        progress(0.25, "Generating subtitles...")
         srt_path = generate_subtitles(audio_path)
         # Translate subtitles
+        progress(0.4, "Translating subtitles...")
         target_lang_codes = [LANGUAGES[lang] for lang in target_langs]
         translated_subs = translate_subtitles(srt_path, target_lang_codes)
+        # Create a copy of the video file in our output directory
         base_video = os.path.join(OUTPUT_DIR, "base_video.mp4")
         shutil.copy(video_file, base_video)
+        # Process each target language
+        output_videos = []
+        for i, (lang_code, sub_path) in enumerate(translated_subs.items()):
+            lang_name = next(name for name, code in LANGUAGES.items() if code == lang_code)
+            progress(0.5 + (i * 0.5 / len(translated_subs)), f"Processing {lang_name}...")
             try:
+                # Generate translated audio
+                logger.info(f"Generating translated audio for {lang_code}")
+                translated_audio = generate_translated_audio(sub_path, lang_code)
+                # Combine video, translated audio, and subtitles
+                output_path = os.path.join(OUTPUT_DIR, f"output_{lang_code}.mp4")
+                logger.info(f"Creating final video with {lang_code} audio and subtitles")
+                output_video = combine_video_audio_subtitles(
+                    base_video,
+                    translated_audio,
+                    sub_path,
+                    output_path
+                )
                 # Verify the output file exists and has content
                 if os.path.exists(output_video) and os.path.getsize(output_video) > 1000:
                 else:
                     logger.warning(f"Output file is missing or too small: {output_video}")
             except Exception as e:
+                logger.error(f"Failed to process {lang_code}: {str(e)}")
         # If all output videos failed, return the original
         if not output_videos:
+            logger.warning("All translations failed, returning original video")
+            return base_video, "Failed to translate video, returning original"
         progress(1.0, "Done!")
+        message = f"Processing complete. Created {len(output_videos)} translated videos."
+        logger.info(message)
+        return output_videos[0], message
     except Exception as e:
         logger.error(f"Processing failed: {str(e)}", exc_info=True)
         return None, f"Processing failed: {str(e)}"
 with gr.Blocks() as demo:
+    gr.Markdown("# Complete Video Translation System")
+    gr.Markdown("Translates both subtitles and audio to target languages")
     with gr.Row():
+        with gr.Column(scale=1):
             video_input = gr.Video(label="Upload Video")
             source_lang = gr.Dropdown(
                 label="Source Language",
                 value="English"
             )
             target_langs = gr.CheckboxGroup(
+                label="Target Languages (Both Audio & Subtitles)",
                 choices=list(LANGUAGES.keys()),
                 value=["Spanish"]
             )
+            submit_btn = gr.Button("Translate", variant="primary")
+        with gr.Column(scale=2):
             output_video = gr.Video(label="Translated Video")
             status_text = gr.Textbox(label="Status", interactive=False)
+            output_info = gr.Markdown("Output videos will be saved in the 'outputs' directory")
     submit_btn.click(
         process_video,
     )
 if __name__ == "__main__":
+    # Check dependencies at startup
+    missing_deps = []
+    # Check ffmpeg
     try:
         version_info = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True)
+        ffmpeg_version = version_info.stdout.split('\n')[0]
+        logger.info(f"ffmpeg version: {ffmpeg_version}")
     except:
+        logger.warning("ffmpeg not found - required for video processing")
+        missing_deps.append("ffmpeg")
+    # Check Python dependencies
+    try:
+        import assemblyai
+        logger.info("AssemblyAI package found")
+    except ImportError:
+        logger.warning("AssemblyAI package not found - required for transcription")
+        missing_deps.append("assemblyai")
+    try:
+        import gtts
+        logger.info("gTTS package found")
+    except ImportError:
+        logger.warning("gTTS package not found - required for text-to-speech")
+        missing_deps.append("gtts")
+    try:
+        import deep_translator
+        logger.info("deep_translator package found")
+    except ImportError:
+        logger.warning("deep_translator package not found - required for translation")
+        missing_deps.append("deep_translator")
+    # Print installation instructions if dependencies are missing
+    if missing_deps:
+        logger.warning("Missing dependencies detected. Please install:")
+        if "ffmpeg" in missing_deps:
+            logger.warning("- ffmpeg: https://ffmpeg.org/download.html")
+        python_deps = [dep for dep in missing_deps if dep != "ffmpeg"]
+        if python_deps:
+            deps_str = " ".join(python_deps)
+            logger.warning(f"- Python packages: pip install {deps_str}")
+    # Start the app
     demo.launch()