AutoSubGen / app.py
Athspi's picture
Update app.py
e057eaf verified
raw
history blame
5.88 kB
import os
import re
import google.generativeai as genai
from moviepy.video.io.VideoFileClip import VideoFileClip
import tempfile
import logging
import gradio as gr
from datetime import timedelta
# Suppress moviepy logs
logging.getLogger("moviepy").setLevel(logging.ERROR)
# Configure Gemini API
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")
# Supported languages
SUPPORTED_LANGUAGES = [
"Auto Detect", "English", "Spanish", "French", "German", "Italian",
"Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
"Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai"
]
# Magic Prompts
TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules:
1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
2. Each subtitle 3-7 words
3. Include speaker changes
4. Preserve emotional tone
5. Format example:
[00:00:05.250 -> 00:00:08.100]
Example subtitle text
Return ONLY subtitles with timestamps."""
TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following:
1. Keep timestamps identical
2. Match text length to timing
3. Preserve technical terms
4. Use natural speech patterns
ORIGINAL:
{subtitles}
TRANSLATED:"""
def parse_timestamp(timestamp_str):
"""Flexible timestamp parser supporting multiple formats"""
clean_ts = timestamp_str.strip("[] ").replace(',', '.')
parts = clean_ts.split(':')
seconds = 0.0
if len(parts) == 3: # HH:MM:SS.ss
hours, minutes, seconds_part = parts
seconds += float(hours) * 3600
elif len(parts) == 2: # MM:SS.ss
minutes, seconds_part = parts
else:
raise ValueError(f"Invalid timestamp: {timestamp_str}")
seconds += float(minutes) * 60
seconds += float(seconds_part)
return seconds
def create_srt(subtitles_text):
"""Robust SRT converter with error handling"""
entries = re.split(r'\n{2,}', subtitles_text.strip())
srt_output = []
for idx, entry in enumerate(entries, 1):
try:
# Match various timestamp formats
time_match = re.search(
r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
entry
)
if not time_match:
continue
start_time = parse_timestamp(time_match.group(1))
end_time = parse_timestamp(time_match.group(2))
text = entry.split(']', 1)[-1].strip()
srt_entry = (
f"{idx}\n"
f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
f"{text}\n"
)
srt_output.append(srt_entry)
except Exception as e:
print(f"Skipping invalid entry {idx}: {str(e)}")
continue
return "\n".join(srt_output)
def extract_audio(video_path):
"""High-quality audio extraction"""
video = VideoFileClip(video_path)
audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav")
video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
return audio_path
def gemini_transcribe(audio_path):
"""Audio transcription with Gemini"""
with open(audio_path, "rb") as f:
audio_data = f.read()
response = model.generate_content(
[TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}]
)
return response.text
def translate_subtitles(subtitles, target_lang):
"""Context-aware translation"""
prompt = TRANSLATION_PROMPT.format(
target_language=target_lang,
subtitles=subtitles
)
response = model.generate_content(prompt)
return response.text
def process_video(video_path, source_lang, target_lang):
"""Complete processing pipeline"""
try:
audio_path = extract_audio(video_path)
raw_transcription = gemini_transcribe(audio_path)
srt_original = create_srt(raw_transcription)
original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
with open(original_srt, "w") as f:
f.write(srt_original)
translated_srt = None
if target_lang != "None":
translated_text = translate_subtitles(srt_original, target_lang)
translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
with open(translated_srt, "w") as f:
f.write(create_srt(translated_text)) # Re-parse translated text
os.remove(audio_path)
return original_srt, translated_srt
except Exception as e:
print(f"Processing error: {str(e)}")
return None, None
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
gr.Markdown("# 🎬 Professional Subtitle Generator")
with gr.Row():
video_input = gr.Video(label="Upload Video", sources=["upload"])
with gr.Column():
source_lang = gr.Dropdown(
label="Source Language",
choices=SUPPORTED_LANGUAGES,
value="Auto Detect"
)
target_lang = gr.Dropdown(
label="Translate To",
choices=["None"] + SUPPORTED_LANGUAGES[1:],
value="None"
)
process_btn = gr.Button("Generate", variant="primary")
with gr.Row():
original_sub = gr.File(label="Original Subtitles")
translated_sub = gr.File(label="Translated Subtitles")
process_btn.click(
process_video,
inputs=[video_input, source_lang, target_lang],
outputs=[original_sub, translated_sub]
)
if __name__ == "__main__":
app.launch(server_port=7860, share=True)