Athspi commited on
Commit
e6d59c3
·
verified ·
1 Parent(s): 43b2dd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -40
app.py CHANGED
@@ -1,10 +1,9 @@
1
- import gradio as gr
2
- import torch
3
  import os
4
- from faster_whisper import WhisperModel
5
  from moviepy.video.io.VideoFileClip import VideoFileClip
 
6
  import logging
7
- import google.generativeai as genai
8
 
9
  # Suppress moviepy logs
10
  logging.getLogger("moviepy").setLevel(logging.ERROR)
@@ -26,15 +25,7 @@ model = genai.GenerativeModel(
26
  generation_config=generation_config,
27
  )
28
 
29
- # Define the Whisper model and device
30
- MODEL_NAME = "Systran/faster-whisper-large-v3"
31
- device = "cuda" if torch.cuda.is_available() else "cpu"
32
- compute_type = "float32" if device == "cuda" else "int8"
33
-
34
- # Load the Whisper model
35
- whisper_model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
36
-
37
- # List of all supported languages in Whisper
38
  SUPPORTED_LANGUAGES = [
39
  "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
40
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
@@ -57,35 +48,42 @@ SUPPORTED_LANGUAGES = [
57
  def extract_audio_from_video(video_file):
58
  """Extract audio from a video file and save it as a WAV file."""
59
  video = VideoFileClip(video_file)
60
- audio_file = "extracted_audio.wav"
61
  video.audio.write_audiofile(audio_file, fps=16000, logger=None) # Suppress logs
62
  return audio_file
63
 
64
- def generate_subtitles(audio_file, language="Auto Detect"):
65
- """Generate subtitles from an audio file using Whisper."""
66
- # Transcribe the audio
67
- segments, info = whisper_model.transcribe(
68
- audio_file,
69
- task="transcribe",
70
- language=None if language == "Auto Detect" else language.lower(),
71
- word_timestamps=True
72
- )
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  # Generate SRT format subtitles
75
  srt_subtitles = ""
76
- for i, segment in enumerate(segments, start=1):
77
- start_time = segment.start
78
- end_time = segment.end
79
- text = segment.text.strip()
80
-
81
- # Format timestamps for SRT
82
  start_time_srt = format_timestamp(start_time)
83
  end_time_srt = format_timestamp(end_time)
84
-
85
- # Add to SRT
86
- srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n"
87
 
88
- return srt_subtitles, info.language
89
 
90
  def format_timestamp(seconds):
91
  """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
@@ -97,10 +95,7 @@ def format_timestamp(seconds):
97
 
98
  def translate_srt(srt_text, target_language):
99
  """Translate an SRT file while preserving timestamps."""
100
- # Magic prompt for Gemini
101
  prompt = f"Translate the following SRT subtitles into {target_language}. Preserve the SRT format (timestamps and structure). Translate only the text after the timestamp. Do not add explanations or extra text.\n\n{srt_text}"
102
-
103
- # Send the prompt to Gemini
104
  response = model.generate_content(prompt)
105
  return response.text
106
 
@@ -109,11 +104,14 @@ def process_video(video_file, language="Auto Detect", translate_to=None):
109
  # Extract audio from the video
110
  audio_file = extract_audio_from_video(video_file)
111
 
 
 
 
112
  # Generate subtitles
113
- subtitles, detected_language = generate_subtitles(audio_file, language)
114
 
115
  # Save original subtitles to an SRT file
116
- original_srt_file = "original_subtitles.srt"
117
  with open(original_srt_file, "w", encoding="utf-8") as f:
118
  f.write(subtitles)
119
 
@@ -121,14 +119,14 @@ def process_video(video_file, language="Auto Detect", translate_to=None):
121
  translated_srt_file = None
122
  if translate_to and translate_to != "None":
123
  translated_subtitles = translate_srt(subtitles, translate_to)
124
- translated_srt_file = "translated_subtitles.srt"
125
  with open(translated_srt_file, "w", encoding="utf-8") as f:
126
  f.write(translated_subtitles)
127
 
128
  # Clean up extracted audio file
129
  os.remove(audio_file)
130
 
131
- return original_srt_file, translated_srt_file, detected_language
132
 
133
  # Define the Gradio interface
134
  with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
 
 
 
1
  import os
2
+ import google.generativeai as genai
3
  from moviepy.video.io.VideoFileClip import VideoFileClip
4
+ import tempfile
5
  import logging
6
+ import gradio as gr
7
 
8
  # Suppress moviepy logs
9
  logging.getLogger("moviepy").setLevel(logging.ERROR)
 
25
  generation_config=generation_config,
26
  )
27
 
28
+ # List of all supported languages
 
 
 
 
 
 
 
 
29
  SUPPORTED_LANGUAGES = [
30
  "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
31
  "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
 
48
  def extract_audio_from_video(video_file):
49
  """Extract audio from a video file and save it as a WAV file."""
50
  video = VideoFileClip(video_file)
51
+ audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
52
  video.audio.write_audiofile(audio_file, fps=16000, logger=None) # Suppress logs
53
  return audio_file
54
 
55
+ def transcribe_audio_with_gemini(audio_file):
56
+ """Transcribe audio using Gemini."""
57
+ with open(audio_file, "rb") as f:
58
+ audio_data = f.read()
59
+
60
+ # Create proper audio blob
61
+ audio_blob = {
62
+ 'mime_type': 'audio/wav',
63
+ 'data': audio_data
64
+ }
65
+
66
+ # Transcribe audio
67
+ convo = model.start_chat()
68
+ convo.send_message("You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language. Respond only with the transcription.")
69
+ response = convo.send_message(audio_blob)
70
+ return response.text.strip()
71
+
72
+ def generate_subtitles(transcription):
73
+ """Generate SRT subtitles from transcription."""
74
+ # Split transcription into lines (assuming each line is a sentence)
75
+ lines = transcription.split("\n")
76
 
77
  # Generate SRT format subtitles
78
  srt_subtitles = ""
79
+ for i, line in enumerate(lines, start=1):
80
+ start_time = i * 5 # Placeholder: 5 seconds per line
81
+ end_time = start_time + 5
 
 
 
82
  start_time_srt = format_timestamp(start_time)
83
  end_time_srt = format_timestamp(end_time)
84
+ srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{line}\n\n"
 
 
85
 
86
+ return srt_subtitles
87
 
88
  def format_timestamp(seconds):
89
  """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
 
95
 
96
  def translate_srt(srt_text, target_language):
97
  """Translate an SRT file while preserving timestamps."""
 
98
  prompt = f"Translate the following SRT subtitles into {target_language}. Preserve the SRT format (timestamps and structure). Translate only the text after the timestamp. Do not add explanations or extra text.\n\n{srt_text}"
 
 
99
  response = model.generate_content(prompt)
100
  return response.text
101
 
 
104
  # Extract audio from the video
105
  audio_file = extract_audio_from_video(video_file)
106
 
107
+ # Transcribe audio using Gemini
108
+ transcription = transcribe_audio_with_gemini(audio_file)
109
+
110
  # Generate subtitles
111
+ subtitles = generate_subtitles(transcription)
112
 
113
  # Save original subtitles to an SRT file
114
+ original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
115
  with open(original_srt_file, "w", encoding="utf-8") as f:
116
  f.write(subtitles)
117
 
 
119
  translated_srt_file = None
120
  if translate_to and translate_to != "None":
121
  translated_subtitles = translate_srt(subtitles, translate_to)
122
+ translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
123
  with open(translated_srt_file, "w", encoding="utf-8") as f:
124
  f.write(translated_subtitles)
125
 
126
  # Clean up extracted audio file
127
  os.remove(audio_file)
128
 
129
+ return original_srt_file, translated_srt_file, "Detected Language: Auto"
130
 
131
  # Define the Gradio interface
132
  with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo: