Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

Athspi commited on Mar 10

Commit

818e336

verified ·

1 Parent(s): e6d59c3

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -131

app.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import os
 import google.generativeai as genai
 from moviepy.video.io.VideoFileClip import VideoFileClip
 import tempfile
 import logging
 import gradio as gr
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
@@ -12,159 +14,162 @@ logging.getLogger("moviepy").setLevel(logging.ERROR)
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 # Create the Gemini model
-generation_config = {
-    "temperature": 1,
-    "top_p": 0.95,
-    "top_k": 40,
-    "max_output_tokens": 8192,
-    "response_mime_type": "text/plain",
-}
-model = genai.GenerativeModel(
-    model_name="gemini-2.0-flash-exp",
-    generation_config=generation_config,
-)
-# List of all supported languages
 SUPPORTED_LANGUAGES = [
-    "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
-    "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
-    "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
-    "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
-    "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
-    "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
-    "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
-    "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
-    "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
-    "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
-    "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
-    "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
-    "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
-    "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
-    "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
-    "Sundanese"
 ]
-def extract_audio_from_video(video_file):
-    """Extract audio from a video file and save it as a WAV file."""
-    video = VideoFileClip(video_file)
-    audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
-    video.audio.write_audiofile(audio_file, fps=16000, logger=None)  # Suppress logs
-    return audio_file
-def transcribe_audio_with_gemini(audio_file):
-    """Transcribe audio using Gemini."""
-    with open(audio_file, "rb") as f:
         audio_data = f.read()
-    # Create proper audio blob
-    audio_blob = {
-        'mime_type': 'audio/wav',
-        'data': audio_data
-    }
-    # Transcribe audio
-    convo = model.start_chat()
-    convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
-    response = convo.send_message(audio_blob)
-    return response.text.strip()
-def generate_subtitles(transcription):
-    """Generate SRT subtitles from transcription."""
-    # Split transcription into lines (assuming each line is a sentence)
-    lines = transcription.split("\n")
-    # Generate SRT format subtitles
-    srt_subtitles = ""
-    for i, line in enumerate(lines, start=1):
-        start_time = i * 5  # Placeholder: 5 seconds per line
-        end_time = start_time + 5
-        start_time_srt = format_timestamp(start_time)
-        end_time_srt = format_timestamp(end_time)
-        srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{line}\n\n"
-    return srt_subtitles
-def format_timestamp(seconds):
-    """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    seconds = seconds % 60
-    milliseconds = int((seconds - int(seconds)) * 1000)
-    return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
-def translate_srt(srt_text, target_language):
-    """Translate an SRT file while preserving timestamps."""
-    prompt = f"Translate the following SRT subtitles into {target_language}. Preserve the SRT format (timestamps and structure). Translate only the text after the timestamp. Do not add explanations or extra text.\n\n{srt_text}"
     response = model.generate_content(prompt)
     return response.text
-def process_video(video_file, language="Auto Detect", translate_to=None):
-    """Process a video file to generate and translate subtitles."""
-    # Extract audio from the video
-    audio_file = extract_audio_from_video(video_file)
-    # Transcribe audio using Gemini
-    transcription = transcribe_audio_with_gemini(audio_file)
-    # Generate subtitles
-    subtitles = generate_subtitles(transcription)
-    # Save original subtitles to an SRT file
-    original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
-    with open(original_srt_file, "w", encoding="utf-8") as f:
-        f.write(subtitles)
-    # Translate subtitles if a target language is provided
-    translated_srt_file = None
-    if translate_to and translate_to != "None":
-        translated_subtitles = translate_srt(subtitles, translate_to)
-        translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
-        with open(translated_srt_file, "w", encoding="utf-8") as f:
-            f.write(translated_subtitles)
-    # Clean up extracted audio file
-    os.remove(audio_file)
-    return original_srt_file, translated_srt_file, "Detected Language: Auto"
-# Define the Gradio interface
-with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
-    # Header
-    with gr.Column():
-        gr.Markdown("# 🎥 AutoSubGen")
-        gr.Markdown("### AI-Powered Video Subtitle Generator")
-        gr.Markdown("Automatically generate and translate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
-    # Main content
-    with gr.Tab("Generate Subtitles"):
-        gr.Markdown("### Upload a video file to generate subtitles.")
-        with gr.Row():
-            video_input = gr.Video(label="Upload Video File", scale=2)
-            language_dropdown = gr.Dropdown(
                 choices=SUPPORTED_LANGUAGES,
-                label="Select Language",
-                value="Auto Detect",
-                scale=1
             )
-            translate_to_dropdown = gr.Dropdown(
-                choices=["None"] + SUPPORTED_LANGUAGES[1:],  # Exclude "Auto Detect"
                 label="Translate To",
-                value="None",
-                scale=1
             )
-        generate_button = gr.Button("Generate Subtitles", variant="primary")
-        with gr.Row():
-            original_subtitle_output = gr.File(label="Download Original Subtitles (SRT)")
-            translated_subtitle_output = gr.File(label="Download Translated Subtitles (SRT)")
-        detected_language_output = gr.Textbox(label="Detected Language")
-    # Link button to function
-    generate_button.click(
         process_video,
-        inputs=[video_input, language_dropdown, translate_to_dropdown],
-        outputs=[original_subtitle_output, translated_subtitle_output, detected_language_output]
     )
-# Launch the Gradio interface with a public link
-demo.launch(share=True)

 import os
+import re
 import google.generativeai as genai
 from moviepy.video.io.VideoFileClip import VideoFileClip
 import tempfile
 import logging
 import gradio as gr
+from datetime import timedelta
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 # Create the Gemini model
+model = genai.GenerativeModel("gemini-2.0-flash-exp")
+# Enhanced language support
 SUPPORTED_LANGUAGES = [
+    "Auto Detect", "English", "Spanish", "French", "German", "Italian",
+    "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
+    "Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai"
 ]
+# Magic Prompts
+TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Analyze this audio and generate precise subtitles with accurate timestamps following these rules:
+1. Identify natural speech segments (3-7 words)
+2. Include exact start/end times in [HH:MM:SS.ms] format
+3. Add speaker identification when multiple voices
+4. Preserve emotional tone and punctuation
+5. Format exactly like:
+[00:00:05.250 -> 00:00:08.100]
+Hello world! This is an example.
+[00:00:08.500 -> 00:00:10.200]
+Second subtitle line.
+Return ONLY the subtitles with timestamps, no explanations."""
+TRANSLATION_PROMPT = """You are a certified translator. Translate these subtitles to {target_language} following these rules:
+1. Keep timestamps EXACTLY as original
+2. Match subtitle length to original timing
+3. Preserve names/technical terms
+4. Use natural colloquial speech
+5. Maintain line breaks and formatting
+ORIGINAL SUBTITLES:
+{subtitles}
+TRANSLATED {target_language} SUBTITLES:"""
+def extract_audio(video_path):
+    """Extract high-quality audio from video"""
+    video = VideoFileClip(video_path)
+    audio_path = os.path.join(tempfile.gettempdir(), "high_quality_audio.wav")
+    video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
+    return audio_path
+def parse_timestamp(timestamp_str):
+    """Convert timestamp string to seconds"""
+    h, m, s = map(float, timestamp_str.split(':'))
+    return h * 3600 + m * 60 + s
+def gemini_transcribe(audio_path):
+    """Get timestamped transcription from Gemini"""
+    with open(audio_path, "rb") as f:
         audio_data = f.read()
+    response = model.generate_content(
+        contents=[TRANSCRIPTION_PROMPT,
+                 {'mime_type': 'audio/wav', 'data': audio_data}]
+    )
+    return response.text
+def create_srt(subtitles_text):
+    """Convert Gemini's raw output to SRT format"""
+    entries = re.split(r'\n{2,}', subtitles_text.strip())
+    srt_output = []
+    for idx, entry in enumerate(entries, 1):
+        time_match = re.match(r'\[(.*?) -> (.*?)\]', entry)
+        if not time_match:
+            continue
+        start_time = parse_timestamp(time_match.group(1))
+        end_time = parse_timestamp(time_match.group(2))
+        text = entry.split(']', 1)[1].strip()
+        srt_output.append(
+            f"{idx}\n"
+            f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
+            f"{text}\n"
+        )
+    return "".join(srt_output)
+def translate_subtitles(subtitles, target_lang):
+    """Translate subtitles while preserving timing"""
+    prompt = TRANSLATION_PROMPT.format(
+        target_language=target_lang,
+        subtitles=subtitles
+    )
     response = model.generate_content(prompt)
     return response.text
+def process_video(video_path, source_lang, target_lang):
+    """Full processing pipeline"""
+    # Audio extraction
+    audio_path = extract_audio(video_path)
+    # Transcription
+    raw_transcription = gemini_transcribe(audio_path)
+    srt_original = create_srt(raw_transcription)
+    # Save original
+    original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
+    with open(original_srt, "w") as f:
+        f.write(srt_original)
+    # Translation
+    translated_srt = None
+    if target_lang != "None":
+        translated_text = translate_subtitles(srt_original, target_lang)
+        translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
+        with open(translated_srt, "w") as f:
+            f.write(translated_text)
+    # Cleanup
+    os.remove(audio_path)
+    return original_srt, translated_srt
+# Gradio Interface
+with gr.Blocks(theme=gr.themes.Default(spacing_size="sm")) as app:
+    gr.Markdown("# 🎬 Professional Subtitle Studio")
+    gr.Markdown("Generate broadcast-quality subtitles with perfect timing")
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.Video(label="Upload Video", sources=["upload"])
+            lang_row = gr.Row()
+            source_lang = gr.Dropdown(
+                label="Source Language",
                 choices=SUPPORTED_LANGUAGES,
+                value="Auto Detect"
             )
+            target_lang = gr.Dropdown(
                 label="Translate To",
+                choices=["None"] + SUPPORTED_LANGUAGES[1:],
+                value="None"
             )
+            process_btn = gr.Button("Generate Subtitles", variant="primary")
+        with gr.Column():
+            original_sub = gr.File(label="Original Subtitles")
+            translated_sub = gr.File(label="Translated Subtitles")
+            preview_area = gr.HTML("""
+                <div style='border: 2px dashed #666; padding: 20px; border-radius: 8px;'>
+                    <h3 style='margin-top: 0;'>Subtitle Preview</h3>
+                    <div id='preview-content' style='height: 300px; overflow-y: auto;'></div>
+                </div>
+            """)
+    process_btn.click(
         process_video,
+        inputs=[video_input, source_lang, target_lang],
+        outputs=[original_sub, translated_sub]
     )
+if __name__ == "__main__":
+    app.launch(server_port=7860, share=True)