Athspi commited on
Commit
e4d42f1
·
verified ·
1 Parent(s): 9fafdf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -194
app.py CHANGED
@@ -1,231 +1,191 @@
1
  import os
2
- import re
3
  import google.generativeai as genai
4
  from moviepy.video.io.VideoFileClip import VideoFileClip
5
  import tempfile
6
  import logging
7
  import gradio as gr
8
- from datetime import timedelta
9
- from pydub import AudioSegment
10
 
11
  # Suppress moviepy logs
12
  logging.getLogger("moviepy").setLevel(logging.ERROR)
13
 
14
  # Configure Gemini API
15
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
16
- model = genai.GenerativeModel("gemini-2.0-flash")
17
 
18
- # Supported languages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  SUPPORTED_LANGUAGES = [
20
- "Auto Detect", "English", "Spanish", "French", "German", "Italian",
21
- "Portuguese", "Russian", "Japanese", "Korean", "Arabic", "Hindi",
22
- "Chinese", "Dutch", "Turkish", "Polish", "Vietnamese", "Thai"
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  ]
24
 
25
- # Magic Prompts
26
- TRANSCRIPTION_PROMPT = """Generate precise subtitles with accurate timestamps:
27
- 1. Use [HH:MM:SS.ms -> HH:MM:SS.ms] format
28
- 2. Each subtitle 3-7 words
29
- 3. Include speaker changes
30
- 4. Preserve emotional tone
31
- 5. Example:
32
-
33
- [00:00:05.250 -> 00:00:08.100]
34
- Example subtitle text
35
-
36
- Return ONLY subtitles with timestamps."""
37
-
38
- TRANSLATION_PROMPT = """Translate these subtitles to {target_language}:
39
- 1. Keep timestamps identical
40
- 2. Match text length to timing
41
- 3. Preserve technical terms
42
- 4. Use natural speech patterns
43
-
44
- ORIGINAL:
45
- {subtitles}
46
-
47
- TRANSLATED:"""
48
-
49
- def extract_audio(video_path):
50
- """Extract high-quality audio from video"""
51
- video = VideoFileClip(video_path)
52
- audio_path = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
53
- video.audio.write_audiofile(audio_path, fps=44100, nbytes=2, codec='pcm_s16le')
54
- return audio_path
55
-
56
- def split_audio(audio_path, chunk_duration=60):
57
- """Split audio into smaller chunks (default: 60 seconds)"""
58
- audio = AudioSegment.from_wav(audio_path)
59
- chunks = []
60
 
61
- for i in range(0, len(audio), chunk_duration * 1000):
62
- chunk = audio[i:i + chunk_duration * 1000]
63
- chunk_path = os.path.join(tempfile.gettempdir(), f"chunk_{i//1000}.wav")
64
- chunk.export(chunk_path, format="wav")
65
- chunks.append(chunk_path)
 
 
 
 
 
 
 
 
 
 
66
 
67
- return chunks
68
-
69
- def process_audio_chunk(chunk_path, start_time):
70
- """Transcribe a single audio chunk"""
71
- try:
72
- # Upload file using Gemini's File API
73
- uploaded_file = genai.upload_file(path=chunk_path)
 
 
 
 
 
 
 
74
 
75
- # Get transcription
76
- response = model.generate_content(
77
- [TRANSCRIPTION_PROMPT, uploaded_file]
78
- )
 
 
 
79
 
80
- # Adjust timestamps relative to chunk start
81
- adjusted_transcription = []
82
- for line in response.text.splitlines():
83
- if '->' in line:
84
- start, end = line.split('->')
85
- adjusted_start = parse_timestamp(start.strip()) + start_time
86
- adjusted_end = parse_timestamp(end.strip()) + start_time
87
- adjusted_line = f"[{format_timestamp(adjusted_start)} -> {format_timestamp(adjusted_end)}]"
88
- adjusted_transcription.append(adjusted_line)
89
- else:
90
- adjusted_transcription.append(line)
91
 
92
- return "\n".join(adjusted_transcription)
93
 
94
- finally:
95
- os.remove(chunk_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- def parse_timestamp(timestamp_str):
98
- """Flexible timestamp parser"""
99
- clean_ts = timestamp_str.strip("[] ").replace(',', '.')
100
- parts = clean_ts.split(':')
101
 
102
- seconds = 0.0
103
- if len(parts) == 3: # HH:MM:SS.ss
104
- hours, minutes, seconds_part = parts
105
- seconds += float(hours) * 3600
106
- elif len(parts) == 2: # MM:SS.ss
107
- minutes, seconds_part = parts
108
- else:
109
- raise ValueError(f"Invalid timestamp: {timestamp_str}")
110
 
111
- seconds += float(minutes) * 60
112
- seconds += float(seconds_part)
113
- return seconds
114
-
115
- def format_timestamp(seconds):
116
- """Convert seconds to SRT format"""
117
- return str(timedelta(seconds=seconds)).replace('.', ',')
118
-
119
- def create_srt(subtitles_text):
120
- """Convert raw transcription to SRT format"""
121
- entries = re.split(r'\n{2,}', subtitles_text.strip())
122
- srt_output = []
123
 
124
- for idx, entry in enumerate(entries, 1):
125
- try:
126
- time_match = re.search(
127
- r'\[?\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*->\s*((?:\d+:)?\d+:\d+[.,]\d{3})\s*\]?',
128
- entry
129
- )
130
- if not time_match:
131
- continue
132
-
133
- start_time = parse_timestamp(time_match.group(1))
134
- end_time = parse_timestamp(time_match.group(2))
135
- text = entry.split(']', 1)[-1].strip()
136
-
137
- srt_entry = (
138
- f"{idx}\n"
139
- f"{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n"
140
- f"{text}\n"
141
- )
142
- srt_output.append(srt_entry)
143
-
144
- except Exception as e:
145
- print(f"Skipping invalid entry {idx}: {str(e)}")
146
- continue
147
 
148
- return "\n".join(srt_output)
149
-
150
- def translate_subtitles(subtitles, target_lang):
151
- """Translate subtitles while preserving timestamps"""
152
- prompt = TRANSLATION_PROMPT.format(
153
- target_language=target_lang,
154
- subtitles=subtitles
155
- )
156
- response = model.generate_content(prompt)
157
- return response.text
158
-
159
- def process_video(video_path, source_lang, target_lang):
160
- """Complete processing pipeline"""
161
- audio_path = None
162
- try:
163
- # Extract audio
164
- audio_path = extract_audio(video_path)
165
-
166
- # Split into chunks
167
- chunks = split_audio(audio_path)
168
- full_transcription = []
169
-
170
- # Process each chunk
171
- for i, chunk_path in enumerate(chunks):
172
- start_time = i * 60 # 60 seconds per chunk
173
- chunk_transcription = process_audio_chunk(chunk_path, start_time)
174
- full_transcription.append(chunk_transcription)
175
-
176
- # Combine results
177
- srt_original = create_srt("\n\n".join(full_transcription))
178
-
179
- # Save original subtitles
180
- original_srt = os.path.join(tempfile.gettempdir(), "original.srt")
181
- with open(original_srt, "w") as f:
182
- f.write(srt_original)
183
-
184
- # Translate if needed
185
- translated_srt = None
186
- if target_lang != "None":
187
- translated_text = translate_subtitles(srt_original, target_lang)
188
- translated_srt = os.path.join(tempfile.gettempdir(), "translated.srt")
189
- with open(translated_srt, "w") as f:
190
- f.write(create_srt(translated_text))
191
-
192
- return original_srt, translated_srt
193
 
194
- except Exception as e:
195
- print(f"Processing error: {str(e)}")
196
- return None, None
197
- finally:
198
- if audio_path and os.path.exists(audio_path):
199
- os.remove(audio_path)
200
-
201
- # Gradio Interface
202
- with gr.Blocks(theme=gr.themes.Soft(), title="AI Subtitle Studio") as app:
203
- gr.Markdown("# 🎬 Professional Subtitle Generator")
204
 
205
- with gr.Row():
206
- video_input = gr.Video(label="Upload Video", sources=["upload"])
207
- with gr.Column():
208
- source_lang = gr.Dropdown(
209
- label="Source Language",
 
 
 
 
 
 
 
 
 
 
 
210
  choices=SUPPORTED_LANGUAGES,
211
- value="Auto Detect"
 
 
212
  )
213
- target_lang = gr.Dropdown(
 
214
  label="Translate To",
215
- choices=["None"] + SUPPORTED_LANGUAGES[1:],
216
- value="None"
217
  )
218
- process_btn = gr.Button("Generate", variant="primary")
219
-
220
- with gr.Row():
221
- original_sub = gr.File(label="Original Subtitles")
222
- translated_sub = gr.File(label="Translated Subtitles")
223
 
224
- process_btn.click(
 
225
  process_video,
226
- inputs=[video_input, source_lang, target_lang],
227
- outputs=[original_sub, translated_sub]
228
  )
229
 
230
- if __name__ == "__main__":
231
- app.launch(server_port=7860, share=True)
 
1
  import os
 
2
  import google.generativeai as genai
3
  from moviepy.video.io.VideoFileClip import VideoFileClip
4
  import tempfile
5
  import logging
6
  import gradio as gr
 
 
7
 
8
  # Suppress moviepy logs
9
  logging.getLogger("moviepy").setLevel(logging.ERROR)
10
 
11
  # Configure Gemini API
12
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 
13
 
14
+ # Create the Gemini model
15
+ generation_config = {
16
+ "temperature": 0.7, # Lower temperature for more accurate results
17
+ "top_p": 0.9,
18
+ "top_k": 40,
19
+ "max_output_tokens": 8192,
20
+ "response_mime_type": "text/plain",
21
+ }
22
+
23
+ model = genai.GenerativeModel(
24
+ model_name="gemini-2.0-flash-exp",
25
+ generation_config=generation_config,
26
+ )
27
+
28
+ # List of all supported languages
29
  SUPPORTED_LANGUAGES = [
30
+ "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
31
+ "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
32
+ "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
33
+ "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
34
+ "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
35
+ "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
36
+ "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
37
+ "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
38
+ "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
39
+ "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
40
+ "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
41
+ "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
42
+ "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
43
+ "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
44
+ "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
45
+ "Sundanese"
46
  ]
47
 
48
+ def extract_audio_from_video(video_file):
49
+ """Extract audio from a video file and save it as a WAV file."""
50
+ video = VideoFileClip(video_file)
51
+ audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
52
+ video.audio.write_audiofile(audio_file, fps=16000, logger=None) # Suppress logs
53
+ return audio_file
54
+
55
+ def transcribe_audio_with_gemini(audio_file):
56
+ """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
57
+ with open(audio_file, "rb") as f:
58
+ audio_data = f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ # Create proper audio blob
61
+ audio_blob = {
62
+ 'mime_type': 'audio/wav',
63
+ 'data': audio_data
64
+ }
65
+
66
+ # Magic prompt for transcription with timestamps
67
+ prompt = """
68
+ You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
69
+ Include timestamps for each sentence in the following format:
70
+ [HH:MM:SS] Sentence 1
71
+ [HH:MM:SS] Sentence 2
72
+ ...
73
+ Respond only with the transcription and timestamps. Do not add explanations or extra text.
74
+ """
75
 
76
+ # Transcribe audio
77
+ convo = model.start_chat()
78
+ convo.send_message(prompt)
79
+ response = convo.send_message(audio_blob)
80
+ return response.text.strip()
81
+
82
+ def generate_subtitles(transcription):
83
+ """Generate SRT subtitles from transcription with timestamps."""
84
+ lines = transcription.split("\n")
85
+ srt_subtitles = ""
86
+
87
+ for i, line in enumerate(lines, start=1):
88
+ if not line.strip():
89
+ continue
90
 
91
+ # Extract timestamp and text
92
+ if line.startswith("["):
93
+ timestamp = line.split("]")[0] + "]"
94
+ text = line.split("]")[1].strip()
95
+ else:
96
+ timestamp = "[00:00:00]"
97
+ text = line.strip()
98
 
99
+ # Convert timestamp to SRT format
100
+ start_time = timestamp[1:-1] # Remove brackets
101
+ end_time = "00:00:05" # Placeholder: 5 seconds per line
 
 
 
 
 
 
 
 
102
 
103
+ srt_subtitles += f"{i}\n{start_time},000 --> {end_time},000\n{text}\n\n"
104
 
105
+ return srt_subtitles
106
+
107
+ def translate_srt(srt_text, target_language):
108
+ """Translate an SRT file while preserving timestamps using a magic prompt."""
109
+ # Magic prompt for translation
110
+ prompt = f"""
111
+ Translate the following SRT subtitles into {target_language}.
112
+ Preserve the SRT format (timestamps and structure).
113
+ Translate only the text after the timestamp.
114
+ Do not add explanations or extra text.
115
+ Ensure the translation is accurate and culturally appropriate.
116
+ Here is the SRT file:
117
+ {srt_text}
118
+ """
119
+
120
+ response = model.generate_content(prompt)
121
+ return response.text
122
 
123
+ def process_video(video_file, language="Auto Detect", translate_to=None):
124
+ """Process a video file to generate and translate subtitles."""
125
+ # Extract audio from the video
126
+ audio_file = extract_audio_from_video(video_file)
127
 
128
+ # Transcribe audio using Gemini
129
+ transcription = transcribe_audio_with_gemini(audio_file)
 
 
 
 
 
 
130
 
131
+ # Generate subtitles
132
+ subtitles = generate_subtitles(transcription)
 
 
 
 
 
 
 
 
 
 
133
 
134
+ # Save original subtitles to an SRT file
135
+ original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
136
+ with open(original_srt_file, "w", encoding="utf-8") as f:
137
+ f.write(subtitles)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ # Translate subtitles if a target language is provided
140
+ translated_srt_file = None
141
+ if translate_to and translate_to != "None":
142
+ translated_subtitles = translate_srt(subtitles, translate_to)
143
+ translated_srt_file = os.path.join(tempfile.gettempdir(), "translated_subtitles.srt")
144
+ with open(translated_srt_file, "w", encoding="utf-8") as f:
145
+ f.write(translated_subtitles)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ # Clean up extracted audio file
148
+ os.remove(audio_file)
 
 
 
 
 
 
 
 
149
 
150
+ return original_srt_file, translated_srt_file, "Detected Language: Auto"
151
+
152
+ # Define the Gradio interface
153
+ with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
154
+ # Header
155
+ with gr.Column():
156
+ gr.Markdown("# 🎥 AutoSubGen")
157
+ gr.Markdown("### AI-Powered Video Subtitle Generator")
158
+ gr.Markdown("Automatically generate and translate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
159
+
160
+ # Main content
161
+ with gr.Tab("Generate Subtitles"):
162
+ gr.Markdown("### Upload a video file to generate subtitles.")
163
+ with gr.Row():
164
+ video_input = gr.Video(label="Upload Video File", scale=2)
165
+ language_dropdown = gr.Dropdown(
166
  choices=SUPPORTED_LANGUAGES,
167
+ label="Select Language",
168
+ value="Auto Detect",
169
+ scale=1
170
  )
171
+ translate_to_dropdown = gr.Dropdown(
172
+ choices=["None"] + SUPPORTED_LANGUAGES[1:], # Exclude "Auto Detect"
173
  label="Translate To",
174
+ value="None",
175
+ scale=1
176
  )
177
+ generate_button = gr.Button("Generate Subtitles", variant="primary")
178
+ with gr.Row():
179
+ original_subtitle_output = gr.File(label="Download Original Subtitles (SRT)")
180
+ translated_subtitle_output = gr.File(label="Download Translated Subtitles (SRT)")
181
+ detected_language_output = gr.Textbox(label="Detected Language")
182
 
183
+ # Link button to function
184
+ generate_button.click(
185
  process_video,
186
+ inputs=[video_input, language_dropdown, translate_to_dropdown],
187
+ outputs=[original_subtitle_output, translated_subtitle_output, detected_language_output]
188
  )
189
 
190
+ # Launch the Gradio interface with a public link
191
+ demo.launch(share=True)