Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import tempfile
|
4 |
-
import
|
5 |
import assemblyai as aai
|
6 |
from deep_translator import GoogleTranslator
|
7 |
import pysrt
|
8 |
-
from gtts import gTTS
|
9 |
-
from pydub import AudioSegment
|
10 |
import logging
|
11 |
import sys
|
|
|
|
|
12 |
|
13 |
-
# Set up logging
|
14 |
logging.basicConfig(level=logging.INFO,
|
15 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
16 |
stream=sys.stdout)
|
@@ -27,59 +27,83 @@ LANGUAGES = {
|
|
27 |
"Hindi": "hi"
|
28 |
}
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
-
logger.info(f"
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
audio_path = os.path.join(output_dir, "audio.wav")
|
37 |
-
logger.info(f"Extracting audio to: {audio_path}")
|
38 |
-
video.audio.write_audiofile(audio_path, logger=None)
|
39 |
return audio_path
|
40 |
except Exception as e:
|
41 |
logger.error(f"Audio extraction failed: {str(e)}", exc_info=True)
|
42 |
raise Exception(f"Audio extraction failed: {str(e)}")
|
43 |
|
44 |
-
def generate_subtitles(audio_path
|
|
|
45 |
try:
|
46 |
-
logger.info(f"Transcribing audio with AssemblyAI")
|
47 |
transcriber = aai.Transcriber()
|
48 |
transcript = transcriber.transcribe(audio_path)
|
49 |
|
50 |
-
srt_path = os.path.join(
|
51 |
logger.info(f"Saving subtitles to: {srt_path}")
|
|
|
52 |
with open(srt_path, "w", encoding="utf-8") as f:
|
53 |
f.write(transcript.export_subtitles_srt())
|
|
|
54 |
return srt_path
|
55 |
except Exception as e:
|
56 |
logger.error(f"Subtitle generation failed: {str(e)}", exc_info=True)
|
57 |
raise Exception(f"Subtitle generation failed: {str(e)}")
|
58 |
|
59 |
-
def translate_subtitles(srt_path, target_langs
|
|
|
60 |
try:
|
61 |
logger.info(f"Loading subtitles from: {srt_path}")
|
62 |
subs = pysrt.open(srt_path, encoding="utf-8")
|
63 |
results = {}
|
64 |
|
65 |
-
for
|
66 |
-
logger.info(f"Translating to language: {
|
67 |
translated_subs = subs[:]
|
68 |
-
translator = GoogleTranslator(source="auto", target=
|
69 |
|
70 |
for i, sub in enumerate(translated_subs):
|
71 |
try:
|
72 |
sub.text = translator.translate(sub.text)
|
73 |
if i % 10 == 0: # Log progress every 10 subtitles
|
74 |
-
logger.info(f"Translated {i+1}/{len(translated_subs)} subtitles to {
|
75 |
except Exception as e:
|
76 |
logger.warning(f"Failed to translate subtitle: {sub.text}. Error: {str(e)}")
|
77 |
# Keep original text if translation fails
|
78 |
|
79 |
-
output_path = os.path.join(
|
80 |
logger.info(f"Saving translated subtitles to: {output_path}")
|
81 |
translated_subs.save(output_path, encoding='utf-8')
|
82 |
-
results[
|
83 |
|
84 |
return results
|
85 |
except Exception as e:
|
@@ -87,19 +111,22 @@ def translate_subtitles(srt_path, target_langs, output_dir):
|
|
87 |
raise Exception(f"Translation failed: {str(e)}")
|
88 |
|
89 |
def add_subtitles_ffmpeg(video_path, srt_path, output_path):
|
90 |
-
"""
|
91 |
try:
|
92 |
-
|
93 |
-
|
94 |
-
logger.info(f"Adding subtitles using ffmpeg")
|
95 |
cmd = [
|
96 |
'ffmpeg',
|
97 |
-
'-i', video_path,
|
98 |
-
'-
|
99 |
-
'-
|
100 |
-
'-
|
101 |
-
'-
|
102 |
-
'-
|
|
|
|
|
|
|
|
|
|
|
103 |
output_path
|
104 |
]
|
105 |
|
@@ -108,75 +135,93 @@ def add_subtitles_ffmpeg(video_path, srt_path, output_path):
|
|
108 |
|
109 |
if process.returncode != 0:
|
110 |
logger.error(f"ffmpeg error: {process.stderr}")
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
return output_path
|
114 |
except Exception as e:
|
115 |
-
logger.error(f"
|
116 |
-
raise Exception(f"
|
117 |
|
118 |
def process_video(video_file, source_lang, target_langs, progress=gr.Progress()):
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
120 |
try:
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
-
|
140 |
-
|
141 |
-
logger.info(f"Creating output video with {lang} subtitles at: {output_path}")
|
142 |
|
143 |
-
#
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
else:
|
152 |
-
logger.warning(f"Output file is missing or too small: {output_path}")
|
153 |
-
raise Exception("Output file verification failed")
|
154 |
-
except Exception as e:
|
155 |
-
logger.error(f"ffmpeg method failed: {str(e)}", exc_info=True)
|
156 |
-
|
157 |
-
# If ffmpeg fails, create a copy of the original video as a fallback
|
158 |
-
logger.info("Using fallback method: copying original video")
|
159 |
-
import shutil
|
160 |
-
fallback_path = os.path.join(tmpdir, f"fallback_{lang}.mp4")
|
161 |
-
shutil.copy(video_file, fallback_path)
|
162 |
-
output_files.append(fallback_path)
|
163 |
-
|
164 |
-
if not output_files:
|
165 |
-
raise Exception("No output files were generated")
|
166 |
|
167 |
-
|
|
|
|
|
|
|
168 |
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
return final_output
|
176 |
-
|
177 |
-
except Exception as e:
|
178 |
-
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
179 |
-
raise gr.Error(f"Processing failed: {str(e)}")
|
180 |
|
181 |
with gr.Blocks() as demo:
|
182 |
gr.Markdown("# Video Translation System")
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import tempfile
|
4 |
+
import subprocess
|
5 |
import assemblyai as aai
|
6 |
from deep_translator import GoogleTranslator
|
7 |
import pysrt
|
|
|
|
|
8 |
import logging
|
9 |
import sys
|
10 |
+
import shutil
|
11 |
+
from pathlib import Path
|
12 |
|
13 |
+
# Set up logging
|
14 |
logging.basicConfig(level=logging.INFO,
|
15 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
16 |
stream=sys.stdout)
|
|
|
27 |
"Hindi": "hi"
|
28 |
}
|
29 |
|
30 |
+
# Create a permanent output directory
|
31 |
+
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
|
32 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
33 |
+
|
34 |
+
def extract_audio(video_path):
|
35 |
+
"""Extract audio from video file using ffmpeg"""
|
36 |
try:
|
37 |
+
logger.info(f"Extracting audio from video: {video_path}")
|
38 |
+
audio_path = os.path.join(OUTPUT_DIR, "audio.wav")
|
39 |
+
|
40 |
+
# Use ffmpeg to extract audio
|
41 |
+
cmd = [
|
42 |
+
'ffmpeg',
|
43 |
+
'-i', video_path,
|
44 |
+
'-vn', # No video
|
45 |
+
'-acodec', 'pcm_s16le', # PCM format
|
46 |
+
'-ar', '44100', # Sample rate
|
47 |
+
'-ac', '2', # Stereo
|
48 |
+
'-y', # Overwrite output file
|
49 |
+
audio_path
|
50 |
+
]
|
51 |
+
|
52 |
+
logger.info(f"Running command: {' '.join(cmd)}")
|
53 |
+
process = subprocess.run(cmd, capture_output=True, text=True)
|
54 |
+
|
55 |
+
if process.returncode != 0:
|
56 |
+
logger.error(f"Audio extraction failed: {process.stderr}")
|
57 |
+
raise Exception(f"Audio extraction failed: {process.stderr}")
|
58 |
|
|
|
|
|
|
|
59 |
return audio_path
|
60 |
except Exception as e:
|
61 |
logger.error(f"Audio extraction failed: {str(e)}", exc_info=True)
|
62 |
raise Exception(f"Audio extraction failed: {str(e)}")
|
63 |
|
64 |
+
def generate_subtitles(audio_path):
|
65 |
+
"""Generate subtitles using AssemblyAI"""
|
66 |
try:
|
67 |
+
logger.info(f"Transcribing audio with AssemblyAI: {audio_path}")
|
68 |
transcriber = aai.Transcriber()
|
69 |
transcript = transcriber.transcribe(audio_path)
|
70 |
|
71 |
+
srt_path = os.path.join(OUTPUT_DIR, "subtitles.srt")
|
72 |
logger.info(f"Saving subtitles to: {srt_path}")
|
73 |
+
|
74 |
with open(srt_path, "w", encoding="utf-8") as f:
|
75 |
f.write(transcript.export_subtitles_srt())
|
76 |
+
|
77 |
return srt_path
|
78 |
except Exception as e:
|
79 |
logger.error(f"Subtitle generation failed: {str(e)}", exc_info=True)
|
80 |
raise Exception(f"Subtitle generation failed: {str(e)}")
|
81 |
|
82 |
+
def translate_subtitles(srt_path, target_langs):
|
83 |
+
"""Translate subtitles to target languages"""
|
84 |
try:
|
85 |
logger.info(f"Loading subtitles from: {srt_path}")
|
86 |
subs = pysrt.open(srt_path, encoding="utf-8")
|
87 |
results = {}
|
88 |
|
89 |
+
for lang_code in target_langs:
|
90 |
+
logger.info(f"Translating to language code: {lang_code}")
|
91 |
translated_subs = subs[:]
|
92 |
+
translator = GoogleTranslator(source="auto", target=lang_code)
|
93 |
|
94 |
for i, sub in enumerate(translated_subs):
|
95 |
try:
|
96 |
sub.text = translator.translate(sub.text)
|
97 |
if i % 10 == 0: # Log progress every 10 subtitles
|
98 |
+
logger.info(f"Translated {i+1}/{len(translated_subs)} subtitles to {lang_code}")
|
99 |
except Exception as e:
|
100 |
logger.warning(f"Failed to translate subtitle: {sub.text}. Error: {str(e)}")
|
101 |
# Keep original text if translation fails
|
102 |
|
103 |
+
output_path = os.path.join(OUTPUT_DIR, f"subtitles_{lang_code}.srt")
|
104 |
logger.info(f"Saving translated subtitles to: {output_path}")
|
105 |
translated_subs.save(output_path, encoding='utf-8')
|
106 |
+
results[lang_code] = output_path
|
107 |
|
108 |
return results
|
109 |
except Exception as e:
|
|
|
111 |
raise Exception(f"Translation failed: {str(e)}")
|
112 |
|
113 |
def add_subtitles_ffmpeg(video_path, srt_path, output_path):
|
114 |
+
"""Add subtitles to video using ffmpeg"""
|
115 |
try:
|
116 |
+
logger.info(f"Adding subtitles to video using ffmpeg")
|
|
|
|
|
117 |
cmd = [
|
118 |
'ffmpeg',
|
119 |
+
'-i', video_path, # Input video
|
120 |
+
'-f', 'srt', # SRT format
|
121 |
+
'-i', srt_path, # Input subtitles
|
122 |
+
'-map', '0:v', # Map video from first input
|
123 |
+
'-map', '0:a', # Map audio from first input
|
124 |
+
'-map', '1', # Map subtitles from second input
|
125 |
+
'-c:v', 'copy', # Copy video codec
|
126 |
+
'-c:a', 'copy', # Copy audio codec
|
127 |
+
'-c:s', 'mov_text', # Use mov_text codec for subtitles
|
128 |
+
'-metadata:s:s:0', 'language=eng', # Set subtitle language
|
129 |
+
'-y', # Overwrite output file
|
130 |
output_path
|
131 |
]
|
132 |
|
|
|
135 |
|
136 |
if process.returncode != 0:
|
137 |
logger.error(f"ffmpeg error: {process.stderr}")
|
138 |
+
|
139 |
+
# If subtitle embedding fails, try burning subtitles into video
|
140 |
+
logger.info("Attempting to burn subtitles into video")
|
141 |
+
cmd = [
|
142 |
+
'ffmpeg',
|
143 |
+
'-i', video_path, # Input video
|
144 |
+
'-vf', f"subtitles='{srt_path}'", # Burn subtitles into video
|
145 |
+
'-c:a', 'copy', # Copy audio codec
|
146 |
+
'-y', # Overwrite output file
|
147 |
+
output_path
|
148 |
+
]
|
149 |
+
|
150 |
+
logger.info(f"Running fallback command: {' '.join(cmd)}")
|
151 |
+
process = subprocess.run(cmd, capture_output=True, text=True)
|
152 |
+
|
153 |
+
if process.returncode != 0:
|
154 |
+
logger.error(f"Subtitle burning failed: {process.stderr}")
|
155 |
+
raise Exception(f"Failed to add subtitles: {process.stderr}")
|
156 |
|
157 |
return output_path
|
158 |
except Exception as e:
|
159 |
+
logger.error(f"Subtitle addition failed: {str(e)}", exc_info=True)
|
160 |
+
raise Exception(f"Subtitle addition failed: {str(e)}")
|
161 |
|
162 |
def process_video(video_file, source_lang, target_langs, progress=gr.Progress()):
|
163 |
+
"""Process video with translation"""
|
164 |
+
try:
|
165 |
+
progress(0.1, "Starting processing...")
|
166 |
+
logger.info(f"Processing video: {video_file}")
|
167 |
+
|
168 |
+
# Make sure we have ffmpeg installed
|
169 |
try:
|
170 |
+
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
|
171 |
+
logger.info("ffmpeg is installed and working")
|
172 |
+
except (subprocess.SubprocessError, FileNotFoundError):
|
173 |
+
error_msg = "ffmpeg is not installed or not in PATH. Please install ffmpeg."
|
174 |
+
logger.error(error_msg)
|
175 |
+
return None, error_msg
|
176 |
+
|
177 |
+
# Extract audio
|
178 |
+
progress(0.2, "Extracting audio...")
|
179 |
+
audio_path = extract_audio(video_file)
|
180 |
+
|
181 |
+
# Generate subtitles
|
182 |
+
progress(0.4, "Generating subtitles...")
|
183 |
+
srt_path = generate_subtitles(audio_path)
|
184 |
+
|
185 |
+
# Translate subtitles
|
186 |
+
progress(0.6, "Translating subtitles...")
|
187 |
+
target_lang_codes = [LANGUAGES[lang] for lang in target_langs]
|
188 |
+
translated_subs = translate_subtitles(srt_path, target_lang_codes)
|
189 |
+
|
190 |
+
# Add subtitles to video for each language
|
191 |
+
progress(0.8, "Creating output videos...")
|
192 |
+
output_videos = []
|
193 |
+
|
194 |
+
# Create a copy of the video file in our output directory first
|
195 |
+
base_video = os.path.join(OUTPUT_DIR, "base_video.mp4")
|
196 |
+
shutil.copy(video_file, base_video)
|
197 |
+
|
198 |
+
for lang_code, sub_path in translated_subs.items():
|
199 |
+
output_path = os.path.join(OUTPUT_DIR, f"output_{lang_code}.mp4")
|
200 |
+
logger.info(f"Adding {lang_code} subtitles to video: {output_path}")
|
201 |
|
202 |
+
try:
|
203 |
+
output_video = add_subtitles_ffmpeg(base_video, sub_path, output_path)
|
|
|
204 |
|
205 |
+
# Verify the output file exists and has content
|
206 |
+
if os.path.exists(output_video) and os.path.getsize(output_video) > 1000:
|
207 |
+
logger.info(f"Successfully created output file: {output_video}")
|
208 |
+
output_videos.append(output_video)
|
209 |
+
else:
|
210 |
+
logger.warning(f"Output file is missing or too small: {output_video}")
|
211 |
+
except Exception as e:
|
212 |
+
logger.error(f"Failed to create video with {lang_code} subtitles: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
214 |
+
# If all output videos failed, return the original
|
215 |
+
if not output_videos:
|
216 |
+
logger.warning("All subtitle additions failed, returning original video")
|
217 |
+
output_videos = [base_video]
|
218 |
|
219 |
+
progress(1.0, "Done!")
|
220 |
+
return output_videos[0], f"Processing complete. Video saved to: {output_videos[0]}"
|
221 |
+
|
222 |
+
except Exception as e:
|
223 |
+
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
224 |
+
return None, f"Processing failed: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
with gr.Blocks() as demo:
|
227 |
gr.Markdown("# Video Translation System")
|