Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

Athspi commited on Mar 10

Commit

e6d59c3

verified ·

1 Parent(s): 43b2dd6

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -40

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
-import gradio as gr
-import torch
 import os
-from faster_whisper import WhisperModel
 from moviepy.video.io.VideoFileClip import VideoFileClip
 import logging
-import google.generativeai as genai
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
@@ -26,15 +25,7 @@ model = genai.GenerativeModel(
     generation_config=generation_config,
 )
-# Define the Whisper model and device
-MODEL_NAME = "Systran/faster-whisper-large-v3"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-compute_type = "float32" if device == "cuda" else "int8"
-# Load the Whisper model
-whisper_model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
-# List of all supported languages in Whisper
 SUPPORTED_LANGUAGES = [
     "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
     "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
@@ -57,35 +48,42 @@ SUPPORTED_LANGUAGES = [
 def extract_audio_from_video(video_file):
     """Extract audio from a video file and save it as a WAV file."""
     video = VideoFileClip(video_file)
-    audio_file = "extracted_audio.wav"
     video.audio.write_audiofile(audio_file, fps=16000, logger=None)  # Suppress logs
     return audio_file
-def generate_subtitles(audio_file, language="Auto Detect"):
-    """Generate subtitles from an audio file using Whisper."""
-    # Transcribe the audio
-    segments, info = whisper_model.transcribe(
-        audio_file,
-        task="transcribe",
-        language=None if language == "Auto Detect" else language.lower(),
-        word_timestamps=True
-    )
     # Generate SRT format subtitles
     srt_subtitles = ""
-    for i, segment in enumerate(segments, start=1):
-        start_time = segment.start
-        end_time = segment.end
-        text = segment.text.strip()
-        # Format timestamps for SRT
         start_time_srt = format_timestamp(start_time)
         end_time_srt = format_timestamp(end_time)
-        # Add to SRT
-        srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n"
-    return srt_subtitles, info.language
 def format_timestamp(seconds):
     """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
@@ -97,10 +95,7 @@ def format_timestamp(seconds):
 def translate_srt(srt_text, target_language):
     """Translate an SRT file while preserving timestamps."""
-    # Magic prompt for Gemini
     prompt = f"Translate the following SRT subtitles into {target_language}. Preserve the SRT format (timestamps and structure). Translate only the text after the timestamp. Do not add explanations or extra text.\n\n{srt_text}"
-    # Send the prompt to Gemini
     response = model.generate_content(prompt)
     return response.text
@@ -109,11 +104,14 @@ def process_video(video_file, language="Auto Detect", translate_to=None):
     # Extract audio from the video
     audio_file = extract_audio_from_video(video_file)
     # Generate subtitles
-    subtitles, detected_language = generate_subtitles(audio_file, language)
     # Save original subtitles to an SRT file
-    original_srt_file = "original_subtitles.srt"
     with open(original_srt_file, "w", encoding="utf-8") as f:
         f.write(subtitles)
@@ -121,14 +119,14 @@ def process_video(video_file, language="Auto Detect", translate_to=None):
     translated_srt_file = None
     if translate_to and translate_to != "None":
         translated_subtitles = translate_srt(subtitles, translate_to)
-        translated_srt_file = "translated_subtitles.srt"
         with open(translated_srt_file, "w", encoding="utf-8") as f:
             f.write(translated_subtitles)
     # Clean up extracted audio file
     os.remove(audio_file)
-    return original_srt_file, translated_srt_file, detected_language
 # Define the Gradio interface
 with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:

 import os
+import google.generativeai as genai
 from moviepy.video.io.VideoFileClip import VideoFileClip
+import tempfile
 import logging
+import gradio as gr
 # Suppress moviepy logs
 logging.getLogger("moviepy").setLevel(logging.ERROR)
     generation_config=generation_config,
 )
+# List of all supported languages
 SUPPORTED_LANGUAGES = [
     "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
     "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
 def extract_audio_from_video(video_file):
     """Extract audio from a video file and save it as a WAV file."""
     video = VideoFileClip(video_file)
+    audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
     video.audio.write_audiofile(audio_file, fps=16000, logger=None)  # Suppress logs
     return audio_file
+def transcribe_audio_with_gemini(audio_file):
+    """Transcribe audio using Gemini."""
+    with open(audio_file, "rb") as f:
+        audio_data = f.read()
+    # Create proper audio blob
+    audio_blob = {
+        'mime_type': 'audio/wav',
+        'data': audio_data
+    }
+    # Transcribe audio
+    convo = model.start_chat()
+    convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
+    response = convo.send_message(audio_blob)
+    return response.text.strip()
+def generate_subtitles(transcription):
+    """Generate SRT subtitles from transcription."""
+    # Split transcription into lines (assuming each line is a sentence)
+    lines = transcription.split("\n")
     # Generate SRT format subtitles
     srt_subtitles = ""
+    for i, line in enumerate(lines, start=1):
+        start_time = i * 5  # Placeholder: 5 seconds per line
+        end_time = start_time + 5
         start_time_srt = format_timestamp(start_time)
         end_time_srt = format_timestamp(end_time)
+        srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{line}\n\n"
+    return srt_subtitles
 def format_timestamp(seconds):
     """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
 def translate_srt(srt_text, target_language):
     """Translate an SRT file while preserving timestamps."""
     prompt = f"Translate the following SRT subtitles into {target_language}. Preserve the SRT format (timestamps and structure). Translate only the text after the timestamp. Do not add explanations or extra text.\n\n{srt_text}"
     response = model.generate_content(prompt)
     return response.text
     # Extract audio from the video
     audio_file = extract_audio_from_video(video_file)
+    # Transcribe audio using Gemini
+    transcription = transcribe_audio_with_gemini(audio_file)
     # Generate subtitles
+    subtitles = generate_subtitles(transcription)
     # Save original subtitles to an SRT file
+    original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
     with open(original_srt_file, "w", encoding="utf-8") as f:
         f.write(subtitles)
     translated_srt_file = None
     if translate_to and translate_to != "None":
         translated_subtitles = translate_srt(subtitles, translate_to)
+        translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
         with open(translated_srt_file, "w", encoding="utf-8") as f:
             f.write(translated_subtitles)
     # Clean up extracted audio file
     os.remove(audio_file)
+    return original_srt_file, translated_srt_file, "Detected Language: Auto"
 # Define the Gradio interface
 with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo: