import os import re import google.generativeai as genai from moviepy.video.io.VideoFileClip import VideoFileClip import tempfile import logging import gradio as gr from datetime import timedelta # Suppress moviepy logs logging.getLogger("moviepy").setLevel(logging.ERROR) # Configure Gemini API genai.configure(api_key=os.environ["GEMINI_API_KEY"]) model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05") # Supported languages SUPPORTED_LANGUAGES = [ "Auto Detect", "English", "Spanish", "French", "German", "Italian", "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi", "Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai" ] # Magic Prompts TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules: 1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format 2. Each subtitle 3-7 words 3. Include speaker changes 4. Preserve emotional tone 5. Format example: [00:00:05.250 -> 00:00:08.100] Example subtitle text Return ONLY subtitles with timestamps.""" TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following: 1. Keep timestamps identical 2. Match text length to timing 3. Preserve technical terms 4. Use natural speech patterns ORIGINAL: {subtitles} TRANSLATED:""" def parse_timestamp(timestamp_str): """Flexible timestamp parser supporting multiple formats""" clean_ts = timestamp_str.strip("[] ").replace(',', '.') parts = clean_ts.split(':') seconds = 0.0 if len(parts) == 3: # HH:MM:SS.ss hours, minutes, seconds_part = parts seconds += float(hours) * 3600 elif len(parts) == 2: # MM:SS.ss minutes, seconds_part = parts else: raise ValueError(f"Invalid timestamp: {timestamp_str}") seconds += float(minutes) * 60 seconds += float(seconds_part) return seconds def create_srt(subtitles_text): """Robust SRT converter with error handling""" entries = re.split(r'\n{2,}', subtitles_text.strip()) srt_output = [] for idx, entry in enumerate(entries, 1): try: # Match various timestamp formats time_match = re.search( r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?', entry ) if not time_match: continue start_time = parse_timestamp(time_match.group(1)) end_time = parse_timestamp(time_match.group(2)) text = entry.split(']', 1)[-1].strip() srt_entry = ( f"{idx}\n" f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n" f"{text}\n" ) srt_output.append(srt_entry) except Exception as e: print(f"Skipping invalid entry {idx}: {str(e)}") continue return "\n".join(srt_output) def extract_audio(video_path): """High-quality audio extraction""" video = VideoFileClip(video_path) audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav") video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le') return audio_path def gemini_transcribe(audio_path): """Audio transcription with Gemini""" with open(audio_path, "rb") as f: audio_data = f.read() response = model.generate_content( [TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}] ) return response.text def translate_subtitles(subtitles, target_lang): """Context-aware translation""" prompt = TRANSLATION_PROMPT.format( target_language=target_lang, subtitles=subtitles ) response = model.generate_content(prompt) return response.text def process_video(video_path, source_lang, target_lang): """Complete processing pipeline""" try: audio_path = extract_audio(video_path) raw_transcription = gemini_transcribe(audio_path) srt_original = create_srt(raw_transcription) original_srt = os.path.join(tempfile.gettempdir(), "original.srt") with open(original_srt, "w") as f: f.write(srt_original) translated_srt = None if target_lang != "None": translated_text = translate_subtitles(srt_original, target_lang) translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt") with open(translated_srt, "w") as f: f.write(create_srt(translated_text)) # Re-parse translated text os.remove(audio_path) return original_srt, translated_srt except Exception as e: print(f"Processing error: {str(e)}") return None, None # Gradio Interface with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app: gr.Markdown("# 🎬 Professional Subtitle Generator") with gr.Row(): video_input = gr.Video(label="Upload Video", sources=["upload"]) with gr.Column(): source_lang = gr.Dropdown( label="Source Language", choices=SUPPORTED_LANGUAGES, value="Auto Detect" ) target_lang = gr.Dropdown( label="Translate To", choices=["None"] + SUPPORTED_LANGUAGES[1:], value="None" ) process_btn = gr.Button("Generate", variant="primary") with gr.Row(): original_sub = gr.File(label="Original Subtitles") translated_sub = gr.File(label="Translated Subtitles") process_btn.click( process_video, inputs=[video_input, source_lang, target_lang], outputs=[original_sub, translated_sub] ) if __name__ == "__main__": app.launch(server_port=7860, share=True)