Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,616 +1,461 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
-
import tempfile
|
4 |
import subprocess
|
5 |
-
import
|
|
|
6 |
from deep_translator import GoogleTranslator
|
7 |
import pysrt
|
8 |
-
import
|
9 |
-
import
|
10 |
import shutil
|
11 |
-
from pathlib import Path
|
12 |
import time
|
13 |
from tqdm import tqdm
|
14 |
-
import
|
15 |
-
|
16 |
|
17 |
# Set up logging
|
18 |
logging.basicConfig(level=logging.INFO,
|
19 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
20 |
-
stream=sys.stdout)
|
21 |
logger = logging.getLogger(__name__)
|
22 |
|
23 |
# Configuration
|
24 |
-
aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
|
25 |
LANGUAGES = {
|
26 |
-
"English": "en",
|
27 |
-
"Spanish": "es",
|
28 |
-
"French": "fr",
|
29 |
-
"German": "de",
|
30 |
-
"Japanese": "ja",
|
31 |
-
"Hindi": "hi"
|
32 |
}
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"
|
40 |
-
"
|
41 |
-
"hi": "tts_models/hi/kb/tacotron2-DDC"
|
42 |
}
|
43 |
|
44 |
-
# Create
|
45 |
-
OUTPUT_DIR =
|
46 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
47 |
|
48 |
# Initialize TTS
|
49 |
-
|
50 |
-
|
51 |
-
tts_models
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
logger.warning(f"Failed to load TTS model for {lang_code}: {str(e)}")
|
59 |
-
return tts_models
|
60 |
|
61 |
-
|
|
|
62 |
|
63 |
-
def
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
# Use ffmpeg to extract audio
|
70 |
-
cmd = [
|
71 |
-
'ffmpeg',
|
72 |
-
'-i', video_path,
|
73 |
-
'-vn', # No video
|
74 |
-
'-acodec', 'pcm_s16le', # PCM format
|
75 |
-
'-ar', '44100', # Sample rate
|
76 |
-
'-ac', '2', # Stereo
|
77 |
-
'-y', # Overwrite output file
|
78 |
-
audio_path
|
79 |
-
]
|
80 |
-
|
81 |
-
logger.info(f"Running command: {' '.join(cmd)}")
|
82 |
-
process = subprocess.run(cmd, capture_output=True, text=True)
|
83 |
-
|
84 |
-
if process.returncode != 0:
|
85 |
-
logger.error(f"Audio extraction failed: {process.stderr}")
|
86 |
-
raise Exception(f"Audio extraction failed: {process.stderr}")
|
87 |
-
|
88 |
-
return audio_path
|
89 |
-
except Exception as e:
|
90 |
-
logger.error(f"Audio extraction failed: {str(e)}", exc_info=True)
|
91 |
-
raise Exception(f"Audio extraction failed: {str(e)}")
|
92 |
|
93 |
-
def
|
94 |
-
"""
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
with open(srt_path, "w", encoding="utf-8") as f:
|
104 |
-
f.write(transcript.export_subtitles_srt())
|
105 |
-
|
106 |
-
return srt_path
|
107 |
-
except Exception as e:
|
108 |
-
logger.error(f"Subtitle generation failed: {str(e)}", exc_info=True)
|
109 |
-
raise Exception(f"Subtitle generation failed: {str(e)}")
|
110 |
|
111 |
-
def
|
112 |
-
"""
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
results = {}
|
117 |
-
|
118 |
-
for lang_code in target_langs:
|
119 |
-
logger.info(f"Translating to language code: {lang_code}")
|
120 |
-
translated_subs = subs[:]
|
121 |
-
translator = GoogleTranslator(source="auto", target=lang_code)
|
122 |
-
|
123 |
-
for i, sub in enumerate(translated_subs):
|
124 |
-
try:
|
125 |
-
sub.text = translator.translate(sub.text)
|
126 |
-
if i % 10 == 0: # Log progress every 10 subtitles
|
127 |
-
logger.info(f"Translated {i+1}/{len(translated_subs)} subtitles to {lang_code}")
|
128 |
-
except Exception as e:
|
129 |
-
logger.warning(f"Failed to translate subtitle: {sub.text}. Error: {str(e)}")
|
130 |
-
# Keep original text if translation fails
|
131 |
-
|
132 |
-
output_path = os.path.join(OUTPUT_DIR, f"subtitles_{lang_code}.srt")
|
133 |
-
logger.info(f"Saving translated subtitles to: {output_path}")
|
134 |
-
translated_subs.save(output_path, encoding='utf-8')
|
135 |
-
results[lang_code] = output_path
|
136 |
-
|
137 |
-
return results
|
138 |
-
except Exception as e:
|
139 |
-
logger.error(f"Translation failed: {str(e)}", exc_info=True)
|
140 |
-
raise Exception(f"Translation failed: {str(e)}")
|
141 |
|
142 |
-
def
|
143 |
-
"""Generate
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
-
for
|
163 |
-
text = sub.text.strip()
|
164 |
-
if not text:
|
165 |
-
continue
|
166 |
-
|
167 |
-
# Get timing information
|
168 |
-
start_time = (sub.start.hours * 3600 +
|
169 |
-
sub.start.minutes * 60 +
|
170 |
-
sub.start.seconds +
|
171 |
-
sub.start.milliseconds / 1000)
|
172 |
-
|
173 |
-
end_time = (sub.end.hours * 3600 +
|
174 |
-
sub.end.minutes * 60 +
|
175 |
-
sub.end.seconds +
|
176 |
-
sub.end.milliseconds / 1000)
|
177 |
-
|
178 |
-
duration = end_time - start_time
|
179 |
-
|
180 |
-
# Generate TTS audio
|
181 |
-
audio_file = os.path.join(temp_dir, f"chunk_{i:04d}.wav")
|
182 |
-
|
183 |
try:
|
184 |
-
|
185 |
-
tts.tts_to_file(text=text, file_path=audio_file)
|
186 |
-
|
187 |
-
if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0:
|
188 |
-
audio_files.append(audio_file)
|
189 |
-
timings.append((start_time, end_time, duration, audio_file))
|
190 |
-
else:
|
191 |
-
logger.warning(f"Generated audio file is empty or does not exist: {audio_file}")
|
192 |
-
|
193 |
except Exception as e:
|
194 |
-
logger.warning(f"
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
201 |
-
silent_cmd = [
|
202 |
-
'ffmpeg',
|
203 |
-
'-f', 'lavfi',
|
204 |
-
'-i', f'anullsrc=r=44100:cl=stereo',
|
205 |
-
'-t', '180', # 3 minutes default
|
206 |
-
'-q:a', '0',
|
207 |
-
'-y',
|
208 |
-
silent_audio
|
209 |
-
]
|
210 |
-
subprocess.run(silent_cmd, capture_output=True)
|
211 |
-
return silent_audio
|
212 |
-
|
213 |
-
# Create a silent audio track the same length as the original video
|
214 |
-
silence_file = os.path.join(temp_dir, "silence.wav")
|
215 |
-
try:
|
216 |
-
video_duration_cmd = [
|
217 |
-
'ffprobe',
|
218 |
-
'-v', 'error',
|
219 |
-
'-show_entries', 'format=duration',
|
220 |
-
'-of', 'default=noprint_wrappers=1:nokey=1',
|
221 |
-
os.path.join(OUTPUT_DIR, "base_video.mp4")
|
222 |
-
]
|
223 |
-
|
224 |
-
duration_result = subprocess.run(video_duration_cmd, capture_output=True, text=True)
|
225 |
-
video_duration = float(duration_result.stdout.strip())
|
226 |
-
except Exception as e:
|
227 |
-
logger.warning(f"Could not determine video duration: {str(e)}. Using default of 180 seconds.")
|
228 |
-
video_duration = 180.0
|
229 |
-
|
230 |
-
# Create silent audio track
|
231 |
-
silent_cmd = [
|
232 |
-
'ffmpeg',
|
233 |
-
'-f', 'lavfi',
|
234 |
-
'-i', f'anullsrc=r=44100:cl=stereo',
|
235 |
-
'-t', str(video_duration),
|
236 |
-
'-q:a', '0',
|
237 |
-
'-y',
|
238 |
-
silence_file
|
239 |
-
]
|
240 |
-
subprocess.run(silent_cmd, capture_output=True)
|
241 |
-
|
242 |
-
# Create a file with the audio mixing commands
|
243 |
-
filter_complex = []
|
244 |
-
input_count = 1 # Starting with 1 because 0 is the silence track
|
245 |
-
|
246 |
-
# Start with silent track
|
247 |
-
filter_parts = ["[0:a]"]
|
248 |
-
|
249 |
-
# Add each audio segment
|
250 |
-
for start_time, end_time, duration, audio_file in timings:
|
251 |
-
filter_parts.append(f"[{input_count}:a]adelay={int(start_time*1000)}|{int(start_time*1000)}")
|
252 |
-
input_count += 1
|
253 |
-
|
254 |
-
# Mix all audio tracks
|
255 |
-
filter_parts.append(f"amix=inputs={input_count}:dropout_transition=0:normalize=0[aout]")
|
256 |
-
filter_complex = ";".join(filter_parts)
|
257 |
-
|
258 |
-
# Build the ffmpeg command with all audio chunks
|
259 |
-
cmd = ['ffmpeg', '-y']
|
260 |
-
|
261 |
-
# Add silent base track
|
262 |
-
cmd.extend(['-i', silence_file])
|
263 |
-
|
264 |
-
# Add all audio chunks
|
265 |
-
for audio_file in audio_files:
|
266 |
-
cmd.extend(['-i', audio_file])
|
267 |
-
|
268 |
-
# Add filter complex and output
|
269 |
-
output_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
270 |
-
cmd.extend([
|
271 |
-
'-filter_complex', filter_complex,
|
272 |
-
'-map', '[aout]',
|
273 |
-
output_audio
|
274 |
-
])
|
275 |
-
|
276 |
-
# Run the command
|
277 |
-
logger.info(f"Combining audio segments: {' '.join(cmd)}")
|
278 |
-
process = subprocess.run(cmd, capture_output=True)
|
279 |
-
|
280 |
-
if process.returncode != 0:
|
281 |
-
logger.error(f"Audio combination failed: {process.stderr}")
|
282 |
-
# Create a fallback silent audio as last resort
|
283 |
-
silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
284 |
-
silent_cmd = [
|
285 |
-
'ffmpeg',
|
286 |
-
'-f', 'lavfi',
|
287 |
-
'-i', f'anullsrc=r=44100:cl=stereo',
|
288 |
-
'-t', str(video_duration),
|
289 |
-
'-q:a', '0',
|
290 |
-
'-y',
|
291 |
-
silent_audio
|
292 |
-
]
|
293 |
-
subprocess.run(silent_cmd, capture_output=True)
|
294 |
-
output_audio = silent_audio
|
295 |
-
|
296 |
-
# Verify the output file exists
|
297 |
-
if not os.path.exists(output_audio):
|
298 |
-
logger.error(f"Output audio file does not exist: {output_audio}")
|
299 |
-
# Create emergency fallback
|
300 |
-
silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
301 |
-
silent_cmd = [
|
302 |
-
'ffmpeg',
|
303 |
-
'-f', 'lavfi',
|
304 |
-
'-i', f'anullsrc=r=44100:cl=stereo',
|
305 |
-
'-t', '180',
|
306 |
-
'-q:a', '0',
|
307 |
-
'-y',
|
308 |
-
silent_audio
|
309 |
-
]
|
310 |
-
subprocess.run(silent_cmd, capture_output=True)
|
311 |
-
output_audio = silent_audio
|
312 |
-
|
313 |
-
# Clean up temporary files
|
314 |
-
try:
|
315 |
-
shutil.rmtree(temp_dir)
|
316 |
-
except Exception as e:
|
317 |
-
logger.warning(f"Failed to clean up temp directory: {str(e)}")
|
318 |
|
319 |
-
|
320 |
-
except Exception as e:
|
321 |
-
logger.error(f"Audio translation failed: {str(e)}", exc_info=True)
|
322 |
-
# Create an emergency fallback silent audio
|
323 |
-
try:
|
324 |
-
silent_audio = os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
325 |
-
silent_cmd = [
|
326 |
-
'ffmpeg',
|
327 |
-
'-f', 'lavfi',
|
328 |
-
'-i', f'anullsrc=r=44100:cl=stereo',
|
329 |
-
'-t', '180',
|
330 |
-
'-q:a', '0',
|
331 |
-
'-y',
|
332 |
-
silent_audio
|
333 |
-
]
|
334 |
-
subprocess.run(silent_cmd, capture_output=True)
|
335 |
-
return silent_audio
|
336 |
-
except:
|
337 |
-
raise Exception(f"Audio translation failed: {str(e)}")
|
338 |
|
339 |
-
def
|
340 |
-
"""
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
if
|
348 |
-
|
349 |
-
|
350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
|
352 |
-
|
|
|
353 |
|
354 |
-
# Create a safe version of the subtitle path
|
355 |
-
safe_srt_path = srt_path.replace(" ", "\\ ").replace(":", "\\:")
|
356 |
-
|
357 |
-
# Command to combine video with translated audio and subtitles
|
358 |
try:
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
'-i', audio_path, # Input translated audio
|
364 |
-
'-map', '0:v', # Use video from first input
|
365 |
-
'-map', '1:a', # Use audio from second input
|
366 |
-
'-vf', f"subtitles={safe_srt_path}:force_style='FontSize=24,PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,BorderStyle=3'", # Burn subtitles
|
367 |
-
'-c:v', 'libx264', # Video codec
|
368 |
-
'-c:a', 'aac', # Audio codec
|
369 |
-
'-shortest', # End when shortest input ends
|
370 |
-
'-y', # Overwrite output file
|
371 |
-
output_path
|
372 |
-
]
|
373 |
-
|
374 |
-
logger.info(f"Running command: {' '.join(cmd)}")
|
375 |
-
process = subprocess.run(cmd, capture_output=True, text=True)
|
376 |
-
|
377 |
-
if process.returncode != 0:
|
378 |
-
logger.warning(f"First method failed: {process.stderr}")
|
379 |
-
raise Exception("First method failed")
|
380 |
-
|
381 |
except Exception as e:
|
382 |
-
logger.warning(f"
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
|
456 |
-
def process_video(
|
457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
try:
|
459 |
-
progress(0.05, "
|
460 |
-
logger.info(f"Processing video: {video_file}")
|
461 |
|
462 |
-
#
|
463 |
-
try:
|
464 |
-
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
|
465 |
-
logger.info("ffmpeg is installed and working")
|
466 |
-
except (subprocess.SubprocessError, FileNotFoundError):
|
467 |
-
error_msg = "ffmpeg is not installed or not in PATH. Please install ffmpeg."
|
468 |
-
logger.error(error_msg)
|
469 |
-
return None, error_msg
|
470 |
-
|
471 |
-
# Extract audio
|
472 |
progress(0.1, "Extracting audio...")
|
473 |
audio_path = extract_audio(video_file)
|
474 |
|
475 |
-
#
|
476 |
-
|
477 |
-
|
|
|
478 |
|
479 |
-
#
|
480 |
-
progress(0.
|
481 |
-
|
482 |
-
|
|
|
|
|
483 |
|
484 |
-
#
|
|
|
|
|
|
|
|
|
485 |
base_video = os.path.join(OUTPUT_DIR, "base_video.mp4")
|
486 |
shutil.copy(video_file, base_video)
|
487 |
|
488 |
-
# Process each target language
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
progress(0.5 + (i * 0.5 / len(translated_subs)), f"Processing {lang_name}...")
|
494 |
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
if not os.path.exists(translated_audio):
|
502 |
-
logger.error(f"Translated audio file does not exist: {translated_audio}")
|
503 |
-
continue
|
504 |
-
|
505 |
-
# Combine video, translated audio, and subtitles
|
506 |
-
output_path = os.path.join(OUTPUT_DIR, f"output_{lang_code}.mp4")
|
507 |
-
logger.info(f"Creating final video with {lang_code} audio and subtitles")
|
508 |
-
|
509 |
-
output_video = combine_video_audio_subtitles(
|
510 |
-
base_video,
|
511 |
-
translated_audio,
|
512 |
-
sub_path,
|
513 |
-
output_path
|
514 |
-
)
|
515 |
-
|
516 |
-
# Verify the output file exists and has content
|
517 |
-
if os.path.exists(output_video) and os.path.getsize(output_video) > 1000:
|
518 |
-
logger.info(f"Successfully created output file: {output_video}")
|
519 |
-
output_videos.append(output_video)
|
520 |
-
else:
|
521 |
-
logger.warning(f"Output file is missing or too small: {output_video}")
|
522 |
-
except Exception as e:
|
523 |
-
logger.error(f"Failed to process {lang_code}: {str(e)}")
|
524 |
-
|
525 |
-
# If all output videos failed, return the original
|
526 |
-
if not output_videos:
|
527 |
-
logger.warning("All translations failed, returning original video")
|
528 |
-
return base_video, "Failed to translate video, returning original"
|
529 |
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
|
|
|
|
|
|
|
535 |
except Exception as e:
|
536 |
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
537 |
-
return None, f"
|
538 |
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
source_lang = gr.Dropdown(
|
547 |
-
label="Source Language",
|
548 |
-
choices=list(LANGUAGES.keys()),
|
549 |
-
value="English"
|
550 |
-
)
|
551 |
-
target_langs = gr.CheckboxGroup(
|
552 |
-
label="Target Languages (Both Audio & Subtitles)",
|
553 |
-
choices=list(LANGUAGES.keys()),
|
554 |
-
value=["Spanish"]
|
555 |
-
)
|
556 |
-
submit_btn = gr.Button("Translate", variant="primary")
|
557 |
-
|
558 |
-
with gr.Column(scale=2):
|
559 |
-
output_video = gr.Video(label="Translated Video")
|
560 |
-
status_text = gr.Textbox(label="Status", interactive=False)
|
561 |
-
output_info = gr.Markdown("Output videos will be saved in the 'outputs' directory")
|
562 |
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
568 |
|
569 |
if __name__ == "__main__":
|
570 |
-
# Check
|
571 |
-
missing_deps = []
|
572 |
-
|
573 |
-
# Check ffmpeg
|
574 |
-
try:
|
575 |
-
version_info = subprocess.run(['ffmpeg', '-version'], capture_output=True, text=True)
|
576 |
-
ffmpeg_version = version_info.stdout.split('\n')[0]
|
577 |
-
logger.info(f"ffmpeg version: {ffmpeg_version}")
|
578 |
-
except:
|
579 |
-
logger.warning("ffmpeg not found - required for video processing")
|
580 |
-
missing_deps.append("ffmpeg")
|
581 |
-
|
582 |
-
# Check Python dependencies
|
583 |
-
try:
|
584 |
-
import assemblyai
|
585 |
-
logger.info("AssemblyAI package found")
|
586 |
-
except ImportError:
|
587 |
-
logger.warning("AssemblyAI package not found - required for transcription")
|
588 |
-
missing_deps.append("assemblyai")
|
589 |
-
|
590 |
-
try:
|
591 |
-
import TTS
|
592 |
-
logger.info("Coqui TTS package found")
|
593 |
-
except ImportError:
|
594 |
-
logger.warning("Coqui TTS package not found - required for text-to-speech")
|
595 |
-
missing_deps.append("TTS")
|
596 |
-
|
597 |
try:
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
if missing_deps:
|
606 |
-
logger.warning("Missing dependencies detected. Please install:")
|
607 |
-
if "ffmpeg" in missing_deps:
|
608 |
-
logger.warning("- ffmpeg: https://ffmpeg.org/download.html")
|
609 |
-
|
610 |
-
python_deps = [dep for dep in missing_deps if dep != "ffmpeg"]
|
611 |
-
if python_deps:
|
612 |
-
deps_str = " ".join(python_deps)
|
613 |
-
logger.warning(f"- Python packages: pip install {deps_str}")
|
614 |
-
|
615 |
-
# Start the app
|
616 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
|
|
3 |
import subprocess
|
4 |
+
import torch
|
5 |
+
from TTS.api import TTS
|
6 |
from deep_translator import GoogleTranslator
|
7 |
import pysrt
|
8 |
+
import whisper # Free speech-to-text
|
9 |
+
import webvtt
|
10 |
import shutil
|
|
|
11 |
import time
|
12 |
from tqdm import tqdm
|
13 |
+
from typing import Dict, List, Optional
|
14 |
+
import logging
|
15 |
|
16 |
# Set up logging
|
17 |
logging.basicConfig(level=logging.INFO,
|
18 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
19 |
logger = logging.getLogger(__name__)
|
20 |
|
21 |
# Configuration
|
|
|
22 |
LANGUAGES = {
|
23 |
+
"English": {"code": "en", "speakers": ["default"], "whisper": "en"},
|
24 |
+
"Spanish": {"code": "es", "speakers": ["default"], "whisper": "es"},
|
25 |
+
"French": {"code": "fr", "speakers": ["default"], "whisper": "fr"},
|
26 |
+
"German": {"code": "de", "speakers": ["thorsten", "eva_k"], "whisper": "de"},
|
27 |
+
"Japanese": {"code": "ja", "speakers": ["default"], "whisper": "ja"},
|
28 |
+
"Hindi": {"code": "hi", "speakers": ["default"], "whisper": "hi"}
|
29 |
}
|
30 |
|
31 |
+
SUBTITLE_STYLES = {
|
32 |
+
"Default": "",
|
33 |
+
"White Text": "color: white;",
|
34 |
+
"Yellow Text": "color: yellow;",
|
35 |
+
"Large Text": "font-size: 24px;",
|
36 |
+
"Bold Text": "font-weight: bold;",
|
37 |
+
"Black Background": "background-color: black; padding: 5px;"
|
|
|
38 |
}
|
39 |
|
40 |
+
# Create output directory
|
41 |
+
OUTPUT_DIR = "outputs"
|
42 |
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
43 |
|
44 |
# Initialize TTS
|
45 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
46 |
+
tts_models = {
|
47 |
+
"en": TTS("tts_models/en/ljspeech/tacotron2-DDC").to(device),
|
48 |
+
"es": TTS("tts_models/es/css10/vits").to(device),
|
49 |
+
"fr": TTS("tts_models/fr/css10/vits").to(device),
|
50 |
+
"de": TTS("tts_models/de/thorsten/tacotron2-DDC").to(device),
|
51 |
+
"ja": TTS("tts_models/ja/kokoro/tacotron2-DDC").to(device),
|
52 |
+
"hi": TTS("tts_models/hi/kb/tacotron2-DDC").to(device)
|
53 |
+
}
|
|
|
|
|
54 |
|
55 |
+
# Initialize Whisper (load when needed)
|
56 |
+
whisper_model = None
|
57 |
|
58 |
+
def get_whisper_model():
|
59 |
+
global whisper_model
|
60 |
+
if whisper_model is None:
|
61 |
+
whisper_model = whisper.load_model("small")
|
62 |
+
return whisper_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
+
def extract_audio(video_path: str) -> str:
|
65 |
+
"""Extract audio using ffmpeg"""
|
66 |
+
audio_path = os.path.join(OUTPUT_DIR, "audio.wav")
|
67 |
+
cmd = [
|
68 |
+
'ffmpeg', '-i', video_path, '-vn',
|
69 |
+
'-acodec', 'pcm_s16le', '-ar', '16000',
|
70 |
+
'-ac', '1', '-y', audio_path
|
71 |
+
]
|
72 |
+
subprocess.run(cmd, check=True)
|
73 |
+
return audio_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
+
def transcribe_with_whisper(audio_path: str, language: str = None) -> str:
|
76 |
+
"""Transcribe audio using Whisper"""
|
77 |
+
model = get_whisper_model()
|
78 |
+
result = model.transcribe(audio_path, language=language, word_timestamps=True)
|
79 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
+
def generate_srt_from_whisper(audio_path: str, language: str) -> str:
|
82 |
+
"""Generate SRT subtitles from Whisper output"""
|
83 |
+
result = transcribe_with_whisper(audio_path, language)
|
84 |
+
|
85 |
+
subs = pysrt.SubRipFile()
|
86 |
+
for i, segment in enumerate(result["segments"]):
|
87 |
+
subs.append(pysrt.SubRipItem(
|
88 |
+
index=i+1,
|
89 |
+
start=pysrt.SubRipTime(seconds=segment["start"]),
|
90 |
+
end=pysrt.SubRipTime(seconds=segment["end"]),
|
91 |
+
text=segment["text"]
|
92 |
+
))
|
93 |
+
|
94 |
+
srt_path = os.path.join(OUTPUT_DIR, "subtitles.srt")
|
95 |
+
subs.save(srt_path, encoding='utf-8')
|
96 |
+
return srt_path
|
97 |
+
|
98 |
+
def detect_language(audio_path: str) -> str:
|
99 |
+
"""Detect language using Whisper"""
|
100 |
+
result = transcribe_with_whisper(audio_path)
|
101 |
+
detected_code = result["language"]
|
102 |
+
for name, data in LANGUAGES.items():
|
103 |
+
if data["whisper"] == detected_code:
|
104 |
+
return name
|
105 |
+
return "English"
|
106 |
+
|
107 |
+
def translate_subtitles(srt_path: str, target_langs: List[str]) -> Dict[str, str]:
|
108 |
+
"""Translate subtitles to multiple languages"""
|
109 |
+
subs = pysrt.open(srt_path)
|
110 |
+
results = {}
|
111 |
+
|
112 |
+
for lang_name in target_langs:
|
113 |
+
lang_code = LANGUAGES[lang_name]["code"]
|
114 |
+
translated_subs = subs[:]
|
115 |
+
translator = GoogleTranslator(source='auto', target=lang_code)
|
116 |
|
117 |
+
for sub in translated_subs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
try:
|
119 |
+
sub.text = translator.translate(sub.text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
except Exception as e:
|
121 |
+
logger.warning(f"Translation failed: {str(e)}")
|
122 |
+
continue
|
123 |
+
|
124 |
+
output_path = os.path.join(OUTPUT_DIR, f"subtitles_{lang_code}.srt")
|
125 |
+
translated_subs.save(output_path, encoding='utf-8')
|
126 |
+
results[lang_code] = output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
+
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
+
def generate_webvtt_subtitles(srt_path: str, style: str = "") -> str:
|
131 |
+
"""Convert SRT to WebVTT with optional styling"""
|
132 |
+
subs = pysrt.open(srt_path)
|
133 |
+
lang_code = os.path.basename(srt_path).split('_')[-1].replace('.srt', '')
|
134 |
+
vtt_path = os.path.join(OUTPUT_DIR, f"subtitles_{lang_code}.vtt")
|
135 |
+
|
136 |
+
with open(vtt_path, 'w', encoding='utf-8') as f:
|
137 |
+
f.write("WEBVTT\n\n")
|
138 |
+
if style:
|
139 |
+
f.write(f"STYLE\n::cue {{\n{style}\n}}\n\n")
|
140 |
+
|
141 |
+
for sub in subs:
|
142 |
+
start = sub.start.to_time().strftime('%H:%M:%S.%f')[:-3]
|
143 |
+
end = sub.end.to_time().strftime('%H:%M:%S.%f')[:-3]
|
144 |
+
f.write(f"{start} --> {end}\n")
|
145 |
+
f.write(f"{sub.text}\n\n")
|
146 |
+
|
147 |
+
return vtt_path
|
148 |
+
|
149 |
+
def generate_translated_audio(
|
150 |
+
srt_path: str,
|
151 |
+
target_lang: str,
|
152 |
+
speaker: str = "default"
|
153 |
+
) -> str:
|
154 |
+
"""Generate translated audio using TTS"""
|
155 |
+
subs = pysrt.open(srt_path)
|
156 |
+
temp_dir = os.path.join(OUTPUT_DIR, f"temp_audio_{target_lang}")
|
157 |
+
os.makedirs(temp_dir, exist_ok=True)
|
158 |
+
|
159 |
+
audio_files = []
|
160 |
+
timings = []
|
161 |
+
tts = tts_models.get(target_lang)
|
162 |
+
|
163 |
+
for i, sub in enumerate(tqdm(subs, desc=f"Generating {target_lang} audio")):
|
164 |
+
text = sub.text.strip()
|
165 |
+
if not text:
|
166 |
+
continue
|
167 |
|
168 |
+
start_time = sub.start.ordinal / 1000
|
169 |
+
audio_file = os.path.join(temp_dir, f"chunk_{i:04d}.wav")
|
170 |
|
|
|
|
|
|
|
|
|
171 |
try:
|
172 |
+
kwargs = {"speaker": speaker} if speaker != "default" and hasattr(tts, 'synthesizer') else {}
|
173 |
+
tts.tts_to_file(text=text, file_path=audio_file, **kwargs)
|
174 |
+
audio_files.append(audio_file)
|
175 |
+
timings.append((start_time, audio_file))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
except Exception as e:
|
177 |
+
logger.warning(f"TTS failed: {str(e)}")
|
178 |
+
|
179 |
+
if not audio_files:
|
180 |
+
raise Exception("No audio generated")
|
181 |
+
|
182 |
+
# Create silent audio
|
183 |
+
video_duration = get_video_duration(os.path.join(OUTPUT_DIR, "base_video.mp4"))
|
184 |
+
silence_file = os.path.join(temp_dir, "silence.wav")
|
185 |
+
subprocess.run([
|
186 |
+
'ffmpeg', '-f', 'lavfi', '-i', 'anullsrc=r=44100:cl=stereo',
|
187 |
+
'-t', str(video_duration), '-y', silence_file
|
188 |
+
], check=True)
|
189 |
+
|
190 |
+
# Mix audio
|
191 |
+
filter_complex = "[0:a]" + "".join(
|
192 |
+
f"[{i+1}:a]adelay={int(start*1000)}|{int(start*1000)}[a{i}];" +
|
193 |
+
f"[a{i-1 if i>0 else 'out'}]" + f"[a{i}]amix=inputs=2[aout]"
|
194 |
+
for i, (start, _) in enumerate(timings)
|
195 |
+
)
|
196 |
+
|
197 |
+
cmd = ['ffmpeg', '-y', '-i', silence_file] + \
|
198 |
+
[f'-i {f}' for f in audio_files] + [
|
199 |
+
'-filter_complex', filter_complex,
|
200 |
+
'-map', '[aout]',
|
201 |
+
os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")]
|
202 |
+
|
203 |
+
subprocess.run(' '.join(cmd), shell=True, check=True)
|
204 |
+
shutil.rmtree(temp_dir)
|
205 |
+
return os.path.join(OUTPUT_DIR, f"translated_audio_{target_lang}.wav")
|
206 |
+
|
207 |
+
def get_video_duration(video_path: str) -> float:
|
208 |
+
"""Get video duration in seconds"""
|
209 |
+
result = subprocess.run([
|
210 |
+
'ffprobe', '-v', 'error', '-show_entries', 'format=duration',
|
211 |
+
'-of', 'default=noprint_wrappers=1:nokey=1', video_path
|
212 |
+
], capture_output=True, text=True)
|
213 |
+
return float(result.stdout.strip() or 180)
|
214 |
+
|
215 |
+
def create_html_player(
|
216 |
+
video_path: str,
|
217 |
+
subtitle_paths: Dict[str, str],
|
218 |
+
style: str = ""
|
219 |
+
) -> str:
|
220 |
+
"""Create HTML player with video and subtitles"""
|
221 |
+
html_path = os.path.join(OUTPUT_DIR, "player.html")
|
222 |
+
video_name = os.path.basename(video_path)
|
223 |
+
|
224 |
+
subtitle_tracks = "\n".join(
|
225 |
+
f'<track kind="subtitles" src="{os.path.basename(path)}" '
|
226 |
+
f'srclang="{lang}" label="{lang.capitalize()}" '
|
227 |
+
f'{"default" if lang == "en" else ""}>'
|
228 |
+
for lang, path in subtitle_paths.items()
|
229 |
+
)
|
230 |
+
|
231 |
+
style_block = f"video::cue {{ {style} }}" if style else ""
|
232 |
+
|
233 |
+
html_content = f"""<!DOCTYPE html>
|
234 |
+
<html>
|
235 |
+
<head>
|
236 |
+
<title>Video Player</title>
|
237 |
+
<style>
|
238 |
+
body {{ font-family: Arial, sans-serif; margin: 20px; }}
|
239 |
+
.container {{ max-width: 800px; margin: 0 auto; }}
|
240 |
+
video {{ width: 100%; background: #000; }}
|
241 |
+
.downloads {{ margin-top: 20px; }}
|
242 |
+
{style_block}
|
243 |
+
</style>
|
244 |
+
</head>
|
245 |
+
<body>
|
246 |
+
<div class="container">
|
247 |
+
<h2>Video Player with Subtitles</h2>
|
248 |
+
<video controls>
|
249 |
+
<source src="{video_name}" type="video/mp4">
|
250 |
+
{subtitle_tracks}
|
251 |
+
</video>
|
252 |
+
|
253 |
+
<div class="downloads">
|
254 |
+
<h3>Download Subtitles:</h3>
|
255 |
+
{"".join(
|
256 |
+
f'<a href="{os.path.basename(path)}" download>'
|
257 |
+
f'{lang.upper()} Subtitles (.vtt)</a><br>'
|
258 |
+
for lang, path in subtitle_paths.items()
|
259 |
+
)}
|
260 |
+
</div>
|
261 |
+
</div>
|
262 |
+
</body>
|
263 |
+
</html>"""
|
264 |
+
|
265 |
+
with open(html_path, 'w', encoding='utf-8') as f:
|
266 |
+
f.write(html_content)
|
267 |
+
|
268 |
+
return html_path
|
269 |
|
270 |
+
def process_video(
|
271 |
+
video_file: str,
|
272 |
+
source_lang: str,
|
273 |
+
target_langs: List[str],
|
274 |
+
subtitle_style: str,
|
275 |
+
speaker_settings: Dict[str, str],
|
276 |
+
progress: gr.Progress = gr.Progress()
|
277 |
+
) -> List[str]:
|
278 |
+
"""Complete video processing pipeline"""
|
279 |
try:
|
280 |
+
progress(0.05, "Initializing...")
|
|
|
281 |
|
282 |
+
# 1. Extract audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
progress(0.1, "Extracting audio...")
|
284 |
audio_path = extract_audio(video_file)
|
285 |
|
286 |
+
# 2. Detect language if needed
|
287 |
+
if source_lang == "Auto-detect":
|
288 |
+
source_lang = detect_language(audio_path)
|
289 |
+
progress(0.15, f"Detected language: {source_lang}")
|
290 |
|
291 |
+
# 3. Generate subtitles
|
292 |
+
progress(0.2, "Generating subtitles...")
|
293 |
+
srt_path = generate_srt_from_whisper(
|
294 |
+
audio_path,
|
295 |
+
LANGUAGES[source_lang]["whisper"]
|
296 |
+
)
|
297 |
|
298 |
+
# 4. Translate subtitles
|
299 |
+
progress(0.3, "Translating subtitles...")
|
300 |
+
translated_subs = translate_subtitles(srt_path, target_langs)
|
301 |
+
|
302 |
+
# 5. Save original video
|
303 |
base_video = os.path.join(OUTPUT_DIR, "base_video.mp4")
|
304 |
shutil.copy(video_file, base_video)
|
305 |
|
306 |
+
# 6. Process each target language
|
307 |
+
translated_vtts = {}
|
308 |
+
for i, lang_name in enumerate(target_langs, 1):
|
309 |
+
lang_code = LANGUAGES[lang_name]["code"]
|
310 |
+
progress(0.4 + (i * 0.5 / len(target_langs)), f"Processing {lang_name}...")
|
|
|
311 |
|
312 |
+
# Generate audio
|
313 |
+
translated_audio = generate_translated_audio(
|
314 |
+
translated_subs[lang_code],
|
315 |
+
lang_code,
|
316 |
+
speaker_settings.get(lang_code, "default")
|
317 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
+
# Generate subtitles
|
320 |
+
vtt_path = generate_webvtt_subtitles(
|
321 |
+
translated_subs[lang_code],
|
322 |
+
SUBTITLE_STYLES.get(subtitle_style, "")
|
323 |
+
)
|
324 |
+
translated_vtts[lang_code] = vtt_path
|
325 |
+
|
326 |
+
# Create translated video version
|
327 |
+
output_video = os.path.join(OUTPUT_DIR, f"output_{lang_code}.mp4")
|
328 |
+
subprocess.run([
|
329 |
+
'ffmpeg', '-i', base_video, '-i', translated_audio,
|
330 |
+
'-map', '0:v', '-map', '1:a', '-c:v', 'copy', '-c:a', 'aac',
|
331 |
+
'-y', output_video
|
332 |
+
], check=True)
|
333 |
+
|
334 |
+
# 7. Create HTML player
|
335 |
+
progress(0.9, "Creating HTML player...")
|
336 |
+
html_path = create_html_player(
|
337 |
+
base_video,
|
338 |
+
translated_vtts,
|
339 |
+
SUBTITLE_STYLES.get(subtitle_style, "")
|
340 |
+
)
|
341 |
+
|
342 |
+
# Prepare all output files
|
343 |
+
output_files = [html_path, base_video] + \
|
344 |
+
list(translated_vtts.values()) + \
|
345 |
+
[os.path.join(OUTPUT_DIR, f"output_{LANGUAGES[lang]['code']}.mp4")
|
346 |
+
for lang in target_langs]
|
347 |
|
348 |
+
progress(1.0, "Done!")
|
349 |
+
return output_files, "Processing completed successfully!"
|
350 |
+
|
351 |
except Exception as e:
|
352 |
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
353 |
+
return None, f"Error: {str(e)}"
|
354 |
|
355 |
+
def get_speaker_settings(*args) -> Dict[str, str]:
|
356 |
+
"""Create speaker settings dictionary from inputs"""
|
357 |
+
settings = {}
|
358 |
+
for i, lang in enumerate(LANGUAGES.keys()):
|
359 |
+
if i < len(args) and args[i]:
|
360 |
+
settings[LANGUAGES[lang]["code"]] = args[i]
|
361 |
+
return settings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
|
363 |
+
def create_interface():
|
364 |
+
"""Create Gradio interface"""
|
365 |
+
with gr.Blocks(title="Video Translator") as demo:
|
366 |
+
gr.Markdown("# Free Video Translation System")
|
367 |
+
gr.Markdown("Translate videos with subtitles and audio dubbing using free/open-source tools")
|
368 |
+
|
369 |
+
with gr.Row():
|
370 |
+
with gr.Column(scale=1):
|
371 |
+
video_input = gr.Video(label="Upload Video")
|
372 |
+
|
373 |
+
with gr.Accordion("Source Settings", open=True):
|
374 |
+
source_lang = gr.Dropdown(
|
375 |
+
label="Source Language",
|
376 |
+
choices=["Auto-detect"] + list(LANGUAGES.keys()),
|
377 |
+
value="Auto-detect"
|
378 |
+
)
|
379 |
+
|
380 |
+
with gr.Accordion("Target Languages", open=True):
|
381 |
+
target_langs = gr.CheckboxGroup(
|
382 |
+
label="Select target languages",
|
383 |
+
choices=list(LANGUAGES.keys()),
|
384 |
+
value=["English", "Spanish"]
|
385 |
+
)
|
386 |
+
|
387 |
+
with gr.Accordion("Subtitle Styling", open=False):
|
388 |
+
subtitle_style = gr.Dropdown(
|
389 |
+
label="Subtitle Appearance",
|
390 |
+
choices=list(SUBTITLE_STYLES.keys()),
|
391 |
+
value="Default"
|
392 |
+
)
|
393 |
+
|
394 |
+
with gr.Accordion("Voice Settings", open=False):
|
395 |
+
speaker_inputs = []
|
396 |
+
for lang_name in LANGUAGES.keys():
|
397 |
+
speakers = LANGUAGES[lang_name]["speakers"]
|
398 |
+
if len(speakers) > 1:
|
399 |
+
speaker_inputs.append(
|
400 |
+
gr.Dropdown(
|
401 |
+
label=f"{lang_name} Speaker",
|
402 |
+
choices=speakers,
|
403 |
+
value=speakers[0],
|
404 |
+
visible=False
|
405 |
+
)
|
406 |
+
)
|
407 |
+
else:
|
408 |
+
speaker_inputs.append(gr.Textbox(visible=False))
|
409 |
+
|
410 |
+
submit_btn = gr.Button("Translate Video", variant="primary")
|
411 |
+
|
412 |
+
with gr.Column(scale=2):
|
413 |
+
output_files = gr.Files(label="Download Files")
|
414 |
+
status = gr.Textbox(label="Status")
|
415 |
+
|
416 |
+
gr.Markdown("""
|
417 |
+
**Instructions:**
|
418 |
+
1. Upload a video file
|
419 |
+
2. Select source and target languages
|
420 |
+
3. Customize subtitles and voices
|
421 |
+
4. Click Translate
|
422 |
+
5. Download the HTML player and open in browser
|
423 |
+
""")
|
424 |
+
|
425 |
+
def update_speaker_ui(selected_langs):
|
426 |
+
updates = []
|
427 |
+
for i, lang_name in enumerate(LANGUAGES.keys()):
|
428 |
+
visible = lang_name in selected_langs and len(LANGUAGES[lang_name]["speakers"]) > 1
|
429 |
+
updates.append(gr.Dropdown.update(visible=visible))
|
430 |
+
return updates
|
431 |
+
|
432 |
+
target_langs.change(
|
433 |
+
update_speaker_ui,
|
434 |
+
inputs=target_langs,
|
435 |
+
outputs=speaker_inputs
|
436 |
+
)
|
437 |
+
|
438 |
+
submit_btn.click(
|
439 |
+
process_video,
|
440 |
+
inputs=[
|
441 |
+
video_input,
|
442 |
+
source_lang,
|
443 |
+
target_langs,
|
444 |
+
subtitle_style,
|
445 |
+
gr.State(lambda: get_speaker_settings(*speaker_inputs))
|
446 |
+
],
|
447 |
+
outputs=[output_files, status]
|
448 |
+
)
|
449 |
+
|
450 |
+
return demo
|
451 |
|
452 |
if __name__ == "__main__":
|
453 |
+
# Check requirements
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
try:
|
455 |
+
subprocess.run(["ffmpeg", "-version"], check=True)
|
456 |
+
import torch, whisper
|
457 |
+
demo = create_interface()
|
458 |
+
demo.launch()
|
459 |
+
except Exception as e:
|
460 |
+
print(f"Error: {str(e)}")
|
461 |
+
print("Please install all requirements: pip install -r requirements.txt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|