Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

AutoSubGen / app.py

Athspi

Update app.py

e057eaf verified 2 months ago

raw

history blame

5.88 kB

	import os
	import re
	import google.generativeai as genai
	from moviepy.video.io.VideoFileClip import VideoFileClip
	import tempfile
	import logging
	import gradio as gr
	from datetime import timedelta

	# Suppress moviepy logs
	logging.getLogger("moviepy").setLevel(logging.ERROR)

	# Configure Gemini API
	genai.configure(api_key=os.environ["GEMINI_API_KEY"])
	model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")

	# Supported languages
	SUPPORTED_LANGUAGES = [
	"Auto Detect", "English", "Spanish", "French", "German", "Italian",
	"Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
	"Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai"
	]

	# Magic Prompts
	TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules:

	1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
	2. Each subtitle 3-7 words
	3. Include speaker changes
	4. Preserve emotional tone
	5. Format example:

	[00:00:05.250 -> 00:00:08.100]
	Example subtitle text

	Return ONLY subtitles with timestamps."""

	TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following:

	1. Keep timestamps identical
	2. Match text length to timing
	3. Preserve technical terms
	4. Use natural speech patterns

	ORIGINAL:
	{subtitles}

	TRANSLATED:"""

	def parse_timestamp(timestamp_str):
	"""Flexible timestamp parser supporting multiple formats"""
	clean_ts = timestamp_str.strip("[] ").replace(',', '.')
	parts = clean_ts.split(':')

	seconds = 0.0
	if len(parts) == 3: # HH:MM:SS.ss
	hours, minutes, seconds_part = parts
	seconds += float(hours) * 3600
	elif len(parts) == 2: # MM:SS.ss
	minutes, seconds_part = parts
	else:
	raise ValueError(f"Invalid timestamp: {timestamp_str}")

	seconds += float(minutes) * 60
	seconds += float(seconds_part)
	return seconds

	def create_srt(subtitles_text):
	"""Robust SRT converter with error handling"""
	entries = re.split(r'\n{2,}', subtitles_text.strip())
	srt_output = []

	for idx, entry in enumerate(entries, 1):
	try:
	# Match various timestamp formats
	time_match = re.search(
	r'\[?\s((?:\d+:)?\d+:\d+[.,]\d{3})\s->\s((?:\d+:)?\d+:\d+[.,]\d{3})\s\]?',
	entry
	)
	if not time_match:
	continue

	start_time = parse_timestamp(time_match.group(1))
	end_time = parse_timestamp(time_match.group(2))
	text = entry.split(']', 1)[-1].strip()

	srt_entry = (
	f"{idx}\n"
	f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
	f"{text}\n"
	)
	srt_output.append(srt_entry)

	except Exception as e:
	print(f"Skipping invalid entry {idx}: {str(e)}")
	continue

	return "\n".join(srt_output)

	def extract_audio(video_path):
	"""High-quality audio extraction"""
	video = VideoFileClip(video_path)
	audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav")
	video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
	return audio_path

	def gemini_transcribe(audio_path):
	"""Audio transcription with Gemini"""
	with open(audio_path, "rb") as f:
	audio_data = f.read()

	response = model.generate_content(
	[TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}]
	)
	return response.text

	def translate_subtitles(subtitles, target_lang):
	"""Context-aware translation"""
	prompt = TRANSLATION_PROMPT.format(
	target_language=target_lang,
	subtitles=subtitles
	)
	response = model.generate_content(prompt)
	return response.text

	def process_video(video_path, source_lang, target_lang):
	"""Complete processing pipeline"""
	try:
	audio_path = extract_audio(video_path)
	raw_transcription = gemini_transcribe(audio_path)
	srt_original = create_srt(raw_transcription)

	original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
	with open(original_srt, "w") as f:
	f.write(srt_original)

	translated_srt = None
	if target_lang != "None":
	translated_text = translate_subtitles(srt_original, target_lang)
	translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
	with open(translated_srt, "w") as f:
	f.write(create_srt(translated_text)) # Re-parse translated text

	os.remove(audio_path)
	return original_srt, translated_srt

	except Exception as e:
	print(f"Processing error: {str(e)}")
	return None, None

	# Gradio Interface
	with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
	gr.Markdown("# 🎬 Professional Subtitle Generator")

	with gr.Row():
	video_input = gr.Video(label="Upload Video", sources=["upload"])
	with gr.Column():
	source_lang = gr.Dropdown(
	label="Source Language",
	choices=SUPPORTED_LANGUAGES,
	value="Auto Detect"
	)
	target_lang = gr.Dropdown(
	label="Translate To",
	choices=["None"] + SUPPORTED_LANGUAGES[1:],
	value="None"
	)
	process_btn = gr.Button("Generate", variant="primary")

	with gr.Row():
	original_sub = gr.File(label="Original Subtitles")
	translated_sub = gr.File(label="Translated Subtitles")

	process_btn.click(
	process_video,
	inputs=[video_input, source_lang, target_lang],
	outputs=[original_sub, translated_sub]
	)

	if __name__ == "__main__":
	app.launch(server_port=7860, share=True)