Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

Athspi commited on Mar 11

Commit

224f399

verified ·

1 Parent(s): e057eaf

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -40

app.py CHANGED Viewed

@@ -6,13 +6,14 @@ import tempfile
 import logging
 import gradio as gr
 from datetime import timedelta
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
 # Configure Gemini API
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
-model = genai.GenerativeModel("gemini-2.0-pro-exp-02-05")
 # Supported languages
 SUPPORTED_LANGUAGES = [
@@ -22,21 +23,19 @@ SUPPORTED_LANGUAGES = [
 ]
 # Magic Prompts
-TRANSCRIPTION_PROMPT = """You are a professional subtitling expert. Generate precise subtitles with accurate timestamps following these rules:
 1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
 2. Each subtitle 3-7 words
 3. Include speaker changes
 4. Preserve emotional tone
-5. Format example:
 [00:00:05.250 -> 00:00:08.100]
 Example subtitle text
 Return ONLY subtitles with timestamps."""
-TRANSLATION_PROMPT = """Translate these subtitles to {target_language} following:
 1. Keep timestamps identical
 2. Match text length to timing
 3. Preserve technical terms
@@ -47,8 +46,49 @@ ORIGINAL:
 TRANSLATED:"""
 def parse_timestamp(timestamp_str):
-    """Flexible timestamp parser supporting multiple formats"""
     clean_ts = timestamp_str.strip("[] ").replace(',', '.')
     parts = clean_ts.split(':')
@@ -65,14 +105,17 @@ def parse_timestamp(timestamp_str):
     seconds += float(seconds_part)
     return seconds
 def create_srt(subtitles_text):
-    """Robust SRT converter with error handling"""
     entries = re.split(r'\n{2,}', subtitles_text.strip())
     srt_output = []
     for idx, entry in enumerate(entries, 1):
         try:
-            # Match various timestamp formats
             time_match = re.search(
                 r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
                 entry
@@ -86,7 +129,7 @@ def create_srt(subtitles_text):
             srt_entry = (
                 f"{idx}\n"
-                f"{timedelta(seconds=start_time)} --> {timedelta(seconds=end_time)}\n"
                 f"{text}\n"
             )
             srt_output.append(srt_entry)
@@ -97,56 +140,46 @@ def create_srt(subtitles_text):
     return "\n".join(srt_output)
-def extract_audio(video_path):
-    """High-quality audio extraction"""
-    video = VideoFileClip(video_path)
-    audio_path = os.path.join(tempfile.gettempdir(), "hq_audio.wav")
-    video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
-    return audio_path
-def gemini_transcribe(audio_path):
-    """Audio transcription with Gemini"""
-    with open(audio_path, "rb") as f:
-        audio_data = f.read()
-    response = model.generate_content(
-        [TRANSCRIPTION_PROMPT, {"mime_type": "audio/wav", "data": audio_data}]
-    )
-    return response.text
-def translate_subtitles(subtitles, target_lang):
-    """Context-aware translation"""
-    prompt = TRANSLATION_PROMPT.format(
-        target_language=target_lang,
-        subtitles=subtitles
-    )
-    response = model.generate_content(prompt)
-    return response.text
 def process_video(video_path, source_lang, target_lang):
     """Complete processing pipeline"""
     try:
         audio_path = extract_audio(video_path)
-        raw_transcription = gemini_transcribe(audio_path)
-        srt_original = create_srt(raw_transcription)
         original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
         with open(original_srt, "w") as f:
             f.write(srt_original)
         translated_srt = None
         if target_lang != "None":
             translated_text = translate_subtitles(srt_original, target_lang)
             translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
             with open(translated_srt, "w") as f:
-                f.write(create_srt(translated_text))  # Re-parse translated text
-        os.remove(audio_path)
         return original_srt, translated_srt
     except Exception as e:
         print(f"Processing error: {str(e)}")
         return None, None
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:

 import logging
 import gradio as gr
 from datetime import timedelta
+from pydub import AudioSegment
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
 # Configure Gemini API
 genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+model = genai.GenerativeModel("gemini-2.0-flash-exp")
 # Supported languages
 SUPPORTED_LANGUAGES = [
 ]
 # Magic Prompts
+TRANSCRIPTION_PROMPT = """Generate precise subtitles with accurate timestamps:
 1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
 2. Each subtitle 3-7 words
 3. Include speaker changes
 4. Preserve emotional tone
+5. Example:
 [00:00:05.250 -> 00:00:08.100]
 Example subtitle text
 Return ONLY subtitles with timestamps."""
+TRANSLATION_PROMPT = """Translate these subtitles to {target_language}:
 1. Keep timestamps identical
 2. Match text length to timing
 3. Preserve technical terms
 TRANSLATED:"""
+def split_audio(audio_path, chunk_duration=60):
+    """Split audio into smaller chunks (default: 60 seconds)"""
+    audio = AudioSegment.from_wav(audio_path)
+    chunks = []
+    for i in range(0, len(audio), chunk_duration * 1000):
+        chunk = audio[i:i + chunk_duration * 1000]
+        chunk_path = os.path.join(tempfile.gettempdir(), f"chunk_{i//1000}.wav")
+        chunk.export(chunk_path, format="wav")
+        chunks.append(chunk_path)
+    return chunks
+def process_audio_chunk(chunk_path, start_time):
+    """Transcribe a single audio chunk"""
+    try:
+        # Upload file using Gemini's File API
+        uploaded_file = genai.upload_file(path=chunk_path)
+        # Get transcription
+        response = model.generate_content(
+            [TRANSCRIPTION_PROMPT, uploaded_file]
+        )
+        # Adjust timestamps relative to chunk start
+        adjusted_transcription = []
+        for line in response.text.splitlines():
+            if '->' in line:
+                start, end = line.split('->')
+                adjusted_start = parse_timestamp(start.strip()) + start_time
+                adjusted_end = parse_timestamp(end.strip()) + start_time
+                adjusted_line = f"[{format_timestamp(adjusted_start)} -> {format_timestamp(adjusted_end)}]"
+                adjusted_transcription.append(adjusted_line)
+            else:
+                adjusted_transcription.append(line)
+        return "\n".join(adjusted_transcription)
+    finally:
+        os.remove(chunk_path)
 def parse_timestamp(timestamp_str):
+    """Flexible timestamp parser"""
     clean_ts = timestamp_str.strip("[] ").replace(',', '.')
     parts = clean_ts.split(':')
     seconds += float(seconds_part)
     return seconds
+def format_timestamp(seconds):
+    """Convert seconds to SRT format"""
+    return str(timedelta(seconds=seconds)).replace('.', ',')
 def create_srt(subtitles_text):
+    """Convert raw transcription to SRT format"""
     entries = re.split(r'\n{2,}', subtitles_text.strip())
     srt_output = []
     for idx, entry in enumerate(entries, 1):
         try:
             time_match = re.search(
                 r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
                 entry
             srt_entry = (
                 f"{idx}\n"
+                f"{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n"
                 f"{text}\n"
             )
             srt_output.append(srt_entry)
     return "\n".join(srt_output)
 def process_video(video_path, source_lang, target_lang):
     """Complete processing pipeline"""
     try:
+        # Extract audio
         audio_path = extract_audio(video_path)
+        # Split into chunks
+        chunks = split_audio(audio_path)
+        full_transcription = []
+        # Process each chunk
+        for i, chunk_path in enumerate(chunks):
+            start_time = i * 60  # 60 seconds per chunk
+            chunk_transcription = process_audio_chunk(chunk_path, start_time)
+            full_transcription.append(chunk_transcription)
+        # Combine results
+        srt_original = create_srt("\n\n".join(full_transcription))
+        # Save original subtitles
         original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
         with open(original_srt, "w") as f:
             f.write(srt_original)
+        # Translate if needed
         translated_srt = None
         if target_lang != "None":
             translated_text = translate_subtitles(srt_original, target_lang)
             translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
             with open(translated_srt, "w") as f:
+                f.write(create_srt(translated_text))
         return original_srt, translated_srt
     except Exception as e:
         print(f"Processing error: {str(e)}")
         return None, None
+    finally:
+        if os.path.exists(audio_path):
+            os.remove(audio_path)
 # Gradio Interface
 with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app: