Spaces:
Running
Running
File size: 5,879 Bytes
91f8d48 818e336 e6d59c3 91f8d48 e6d59c3 91f8d48 e6d59c3 818e336 91f8d48 e057eaf 818e336 43fec16 91f8d48 818e336 91f8d48 818e336 43fec16 818e336 43fec16 818e336 43fec16 818e336 43fec16 818e336 43fec16 818e336 43fec16 818e336 43fec16 818e336 43fec16 91f8d48 818e336 43fec16 818e336 43fec16 818e336 43fec16 818e336 e6d59c3 818e336 43fec16 818e336 43fec16 818e336 91f8d48 818e336 43fec16 91f8d48 43fec16 818e336 43fec16 91f8d48 818e336 43fec16 818e336 91f8d48 818e336 91f8d48 818e336 91f8d48 818e336 91f8d48 43fec16 91f8d48 818e336 91f8d48 818e336 91f8d48 818e336 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import os
import re
import google.generativeai as genai
from moviepy.video.io.VideoFileClip import VideoFileClip
import tempfile
import logging
import gradio as gr
from datetime import timedelta
# Suppress moviepy logs
logging.getLogger("moviepy").setLevel(logging.ERROR)
# Configure Gemini API
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")
# Supported languages
SUPPORTED_LANGUAGES = [
"Auto Detect", "English", "Spanish", "French", "German", "Italian",
"Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
"Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai"
]
# Magic Prompts
TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules:
1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
2. Each subtitle 3-7 words
3. Include speaker changes
4. Preserve emotional tone
5. Format example:
[00:00:05.250 -> 00:00:08.100]
Example subtitle text
Return ONLY subtitles with timestamps."""
TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following:
1. Keep timestamps identical
2. Match text length to timing
3. Preserve technical terms
4. Use natural speech patterns
ORIGINAL:
{subtitles}
TRANSLATED:"""
def parse_timestamp(timestamp_str):
"""Flexible timestamp parser supporting multiple formats"""
clean_ts = timestamp_str.strip("[] ").replace(',', '.')
parts = clean_ts.split(':')
seconds = 0.0
if len(parts) == 3: # HH:MM:SS.ss
hours, minutes, seconds_part = parts
seconds += float(hours) * 3600
elif len(parts) == 2: # MM:SS.ss
minutes, seconds_part = parts
else:
raise ValueError(f"Invalid timestamp: {timestamp_str}")
seconds += float(minutes) * 60
seconds += float(seconds_part)
return seconds
def create_srt(subtitles_text):
"""Robust SRT converter with error handling"""
entries = re.split(r'\n{2,}', subtitles_text.strip())
srt_output = []
for idx, entry in enumerate(entries, 1):
try:
# Match various timestamp formats
time_match = re.search(
r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
entry
)
if not time_match:
continue
start_time = parse_timestamp(time_match.group(1))
end_time = parse_timestamp(time_match.group(2))
text = entry.split(']', 1)[-1].strip()
srt_entry = (
f"{idx}\n"
f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
f"{text}\n"
)
srt_output.append(srt_entry)
except Exception as e:
print(f"Skipping invalid entry {idx}: {str(e)}")
continue
return "\n".join(srt_output)
def extract_audio(video_path):
"""High-quality audio extraction"""
video = VideoFileClip(video_path)
audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav")
video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
return audio_path
def gemini_transcribe(audio_path):
"""Audio transcription with Gemini"""
with open(audio_path, "rb") as f:
audio_data = f.read()
response = model.generate_content(
[TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}]
)
return response.text
def translate_subtitles(subtitles, target_lang):
"""Context-aware translation"""
prompt = TRANSLATION_PROMPT.format(
target_language=target_lang,
subtitles=subtitles
)
response = model.generate_content(prompt)
return response.text
def process_video(video_path, source_lang, target_lang):
"""Complete processing pipeline"""
try:
audio_path = extract_audio(video_path)
raw_transcription = gemini_transcribe(audio_path)
srt_original = create_srt(raw_transcription)
original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
with open(original_srt, "w") as f:
f.write(srt_original)
translated_srt = None
if target_lang != "None":
translated_text = translate_subtitles(srt_original, target_lang)
translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
with open(translated_srt, "w") as f:
f.write(create_srt(translated_text)) # Re-parse translated text
os.remove(audio_path)
return original_srt, translated_srt
except Exception as e:
print(f"Processing error: {str(e)}")
return None, None
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
gr.Markdown("# 🎬 Professional Subtitle Generator")
with gr.Row():
video_input = gr.Video(label="Upload Video", sources=["upload"])
with gr.Column():
source_lang = gr.Dropdown(
label="Source Language",
choices=SUPPORTED_LANGUAGES,
value="Auto Detect"
)
target_lang = gr.Dropdown(
label="Translate To",
choices=["None"] + SUPPORTED_LANGUAGES[1:],
value="None"
)
process_btn = gr.Button("Generate", variant="primary")
with gr.Row():
original_sub = gr.File(label="Original Subtitles")
translated_sub = gr.File(label="Translated Subtitles")
process_btn.click(
process_video,
inputs=[video_input, source_lang, target_lang],
outputs=[original_sub, translated_sub]
)
if __name__ == "__main__":
app.launch(server_port=7860, share=True) |