Spaces:
Running
Running
import os | |
import re | |
import google.generativeai as genai | |
from moviepy.video.io.VideoFileClip import VideoFileClip | |
import tempfile | |
import logging | |
import gradio as gr | |
from datetime import timedelta | |
# Suppress moviepy logs | |
logging.getLogger("moviepy").setLevel(logging.ERROR) | |
# Configure Gemini API | |
genai.configure(api_key=os.environ["GEMINI_API_KEY"]) | |
model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05") | |
# Supported languages | |
SUPPORTED_LANGUAGES = [ | |
"Auto Detect", "English", "Spanish", "French", "German", "Italian", | |
"Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi", | |
"Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai" | |
] | |
# Magic Prompts | |
TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules: | |
1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format | |
2. Each subtitle 3-7 words | |
3. Include speaker changes | |
4. Preserve emotional tone | |
5. Format example: | |
[00:00:05.250 -> 00:00:08.100] | |
Example subtitle text | |
Return ONLY subtitles with timestamps.""" | |
TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following: | |
1. Keep timestamps identical | |
2. Match text length to timing | |
3. Preserve technical terms | |
4. Use natural speech patterns | |
ORIGINAL: | |
{subtitles} | |
TRANSLATED:""" | |
def parse_timestamp(timestamp_str): | |
"""Flexible timestamp parser supporting multiple formats""" | |
clean_ts = timestamp_str.strip("[] ").replace(',', '.') | |
parts = clean_ts.split(':') | |
seconds = 0.0 | |
if len(parts) == 3: # HH:MM:SS.ss | |
hours, minutes, seconds_part = parts | |
seconds += float(hours) * 3600 | |
elif len(parts) == 2: # MM:SS.ss | |
minutes, seconds_part = parts | |
else: | |
raise ValueError(f"Invalid timestamp: {timestamp_str}") | |
seconds += float(minutes) * 60 | |
seconds += float(seconds_part) | |
return seconds | |
def create_srt(subtitles_text): | |
"""Robust SRT converter with error handling""" | |
entries = re.split(r'\n{2,}', subtitles_text.strip()) | |
srt_output = [] | |
for idx, entry in enumerate(entries, 1): | |
try: | |
# Match various timestamp formats | |
time_match = re.search( | |
r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?', | |
entry | |
) | |
if not time_match: | |
continue | |
start_time = parse_timestamp(time_match.group(1)) | |
end_time = parse_timestamp(time_match.group(2)) | |
text = entry.split(']', 1)[-1].strip() | |
srt_entry = ( | |
f"{idx}\n" | |
f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n" | |
f"{text}\n" | |
) | |
srt_output.append(srt_entry) | |
except Exception as e: | |
print(f"Skipping invalid entry {idx}: {str(e)}") | |
continue | |
return "\n".join(srt_output) | |
def extract_audio(video_path): | |
"""High-quality audio extraction""" | |
video = VideoFileClip(video_path) | |
audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav") | |
video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le') | |
return audio_path | |
def gemini_transcribe(audio_path): | |
"""Audio transcription with Gemini""" | |
with open(audio_path, "rb") as f: | |
audio_data = f.read() | |
response = model.generate_content( | |
[TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}] | |
) | |
return response.text | |
def translate_subtitles(subtitles, target_lang): | |
"""Context-aware translation""" | |
prompt = TRANSLATION_PROMPT.format( | |
target_language=target_lang, | |
subtitles=subtitles | |
) | |
response = model.generate_content(prompt) | |
return response.text | |
def process_video(video_path, source_lang, target_lang): | |
"""Complete processing pipeline""" | |
try: | |
audio_path = extract_audio(video_path) | |
raw_transcription = gemini_transcribe(audio_path) | |
srt_original = create_srt(raw_transcription) | |
original_srt = os.path.join(tempfile.gettempdir(), "original.srt") | |
with open(original_srt, "w") as f: | |
f.write(srt_original) | |
translated_srt = None | |
if target_lang != "None": | |
translated_text = translate_subtitles(srt_original, target_lang) | |
translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt") | |
with open(translated_srt, "w") as f: | |
f.write(create_srt(translated_text)) # Re-parse translated text | |
os.remove(audio_path) | |
return original_srt, translated_srt | |
except Exception as e: | |
print(f"Processing error: {str(e)}") | |
return None, None | |
# Gradio Interface | |
with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app: | |
gr.Markdown("# 🎬 Professional Subtitle Generator") | |
with gr.Row(): | |
video_input = gr.Video(label="Upload Video", sources=["upload"]) | |
with gr.Column(): | |
source_lang = gr.Dropdown( | |
label="Source Language", | |
choices=SUPPORTED_LANGUAGES, | |
value="Auto Detect" | |
) | |
target_lang = gr.Dropdown( | |
label="Translate To", | |
choices=["None"] + SUPPORTED_LANGUAGES[1:], | |
value="None" | |
) | |
process_btn = gr.Button("Generate", variant="primary") | |
with gr.Row(): | |
original_sub = gr.File(label="Original Subtitles") | |
translated_sub = gr.File(label="Translated Subtitles") | |
process_btn.click( | |
process_video, | |
inputs=[video_input, source_lang, target_lang], | |
outputs=[original_sub, translated_sub] | |
) | |
if __name__ == "__main__": | |
app.launch(server_port=7860, share=True) |