Spaces:
Running
Running
Update src/audio/generator.py
Browse files- src/audio/generator.py +57 -61
src/audio/generator.py
CHANGED
@@ -76,11 +76,13 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
|
|
76 |
retry_count = 0
|
77 |
while retry_count < MAX_RETRY_ATTEMPTS:
|
78 |
try:
|
79 |
-
# For certain languages, use slower speed
|
80 |
slow_option = target_lang in ["hi", "ja", "zh-CN", "ar"]
|
81 |
tts = gTTS(text=text, lang=target_lang, slow=slow_option)
|
82 |
tts.save(str(audio_file))
|
83 |
|
|
|
|
|
84 |
if audio_file.exists() and audio_file.stat().st_size > 0:
|
85 |
break
|
86 |
else:
|
@@ -89,9 +91,9 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
|
|
89 |
except Exception as e:
|
90 |
retry_count += 1
|
91 |
logger.warning(f"TTS attempt {retry_count} failed for {target_lang}: {str(e)}")
|
92 |
-
time.sleep(1)
|
93 |
|
94 |
-
#
|
95 |
if retry_count == MAX_RETRY_ATTEMPTS - 1 and len(text) > 100:
|
96 |
logger.warning(f"Trying with shortened text for {target_lang}")
|
97 |
shortened_text = text[:100] + "..."
|
@@ -104,83 +106,77 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
|
|
104 |
else:
|
105 |
logger.warning(f"Failed to generate audio for subtitle {i}")
|
106 |
|
107 |
-
#
|
108 |
if not audio_files:
|
109 |
-
logger.warning(f"No audio files
|
110 |
-
# Create a silent audio file as fallback
|
111 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
112 |
create_silent_audio(video_duration, silent_audio)
|
113 |
return silent_audio
|
114 |
-
|
115 |
-
#
|
|
|
116 |
silence_file = temp_dir / "silence.wav"
|
117 |
create_silent_audio(video_duration, silence_file)
|
118 |
-
|
119 |
-
#
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
# Add each audio segment
|
127 |
-
for start_time, end_time, duration, audio_file in timings:
|
128 |
-
delay_ms = int(start_time * 1000)
|
129 |
-
filter_parts.append(f"[{input_count}:a]adelay={delay_ms}|{delay_ms}")
|
130 |
-
input_count += 1
|
131 |
-
|
132 |
-
# Mix all audio tracks
|
133 |
-
filter_parts.append(f"amix=inputs={input_count}:dropout_transition=0:normalize=0[aout]")
|
134 |
-
filter_complex = ";".join(filter_parts)
|
135 |
-
|
136 |
-
# Build the ffmpeg command
|
137 |
cmd = ['ffmpeg', '-y']
|
|
|
138 |
|
139 |
-
# Add
|
140 |
-
cmd.extend(['-i', str(silence_file)])
|
141 |
-
|
142 |
-
# Add all audio chunks
|
143 |
for audio_file in audio_files:
|
144 |
-
cmd
|
145 |
-
|
146 |
-
#
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
-
|
150 |
-
|
|
|
|
|
|
|
|
|
151 |
'-filter_complex', filter_complex,
|
152 |
'-map', '[aout]',
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
logger.debug(f"Running command: {' '.join(cmd)}")
|
|
|
|
|
159 |
process = subprocess.run(cmd, capture_output=True, text=True)
|
160 |
|
161 |
if process.returncode != 0:
|
162 |
-
logger.error(f"Audio
|
163 |
-
# Create a fallback silent audio
|
164 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
165 |
create_silent_audio(video_duration, silent_audio)
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
169 |
try:
|
170 |
shutil.rmtree(temp_dir)
|
171 |
-
logger.debug(f"Cleaned
|
172 |
except Exception as e:
|
173 |
-
logger.warning(f"
|
174 |
-
|
175 |
-
logger.info(f"Successfully created translated audio: {output_audio}")
|
176 |
return output_audio
|
|
|
177 |
except Exception as e:
|
178 |
-
logger.error(f"Audio
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
183 |
-
create_silent_audio(video_duration, silent_audio)
|
184 |
-
return silent_audio
|
185 |
-
except:
|
186 |
-
raise Exception(f"Audio translation failed: {str(e)}")
|
|
|
76 |
retry_count = 0
|
77 |
while retry_count < MAX_RETRY_ATTEMPTS:
|
78 |
try:
|
79 |
+
# For certain languages, use slower speed
|
80 |
slow_option = target_lang in ["hi", "ja", "zh-CN", "ar"]
|
81 |
tts = gTTS(text=text, lang=target_lang, slow=slow_option)
|
82 |
tts.save(str(audio_file))
|
83 |
|
84 |
+
logger.info(f"Generated TTS file size for chunk {i}: {audio_file.stat().st_size} bytes")
|
85 |
+
|
86 |
if audio_file.exists() and audio_file.stat().st_size > 0:
|
87 |
break
|
88 |
else:
|
|
|
91 |
except Exception as e:
|
92 |
retry_count += 1
|
93 |
logger.warning(f"TTS attempt {retry_count} failed for {target_lang}: {str(e)}")
|
94 |
+
time.sleep(1)
|
95 |
|
96 |
+
# Fallback to shortened text
|
97 |
if retry_count == MAX_RETRY_ATTEMPTS - 1 and len(text) > 100:
|
98 |
logger.warning(f"Trying with shortened text for {target_lang}")
|
99 |
shortened_text = text[:100] + "..."
|
|
|
106 |
else:
|
107 |
logger.warning(f"Failed to generate audio for subtitle {i}")
|
108 |
|
109 |
+
# Fallback if no audio generated
|
110 |
if not audio_files:
|
111 |
+
logger.warning(f"No audio files generated for {target_lang}")
|
|
|
112 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
113 |
create_silent_audio(video_duration, silent_audio)
|
114 |
return silent_audio
|
115 |
+
|
116 |
+
# Output configuration
|
117 |
+
output_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.mp3"
|
118 |
silence_file = temp_dir / "silence.wav"
|
119 |
create_silent_audio(video_duration, silence_file)
|
120 |
+
|
121 |
+
# Validate input files
|
122 |
+
for f in [silence_file, *audio_files]:
|
123 |
+
if not f.exists():
|
124 |
+
logger.error(f"Missing input file: {f}")
|
125 |
+
return create_silent_audio(video_duration, output_audio)
|
126 |
+
|
127 |
+
# Build FFmpeg command with volume boost and timing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
cmd = ['ffmpeg', '-y']
|
129 |
+
cmd += ['-i', str(silence_file)]
|
130 |
|
131 |
+
# Add all audio chunks as inputs
|
|
|
|
|
|
|
132 |
for audio_file in audio_files:
|
133 |
+
cmd += ['-i', str(audio_file)]
|
134 |
+
|
135 |
+
# Create filter chain for each audio chunk
|
136 |
+
filter_chains = []
|
137 |
+
for i, (start_time, _, _, _) in enumerate(timings):
|
138 |
+
delay_ms = int(start_time * 1000)
|
139 |
+
filter_chains.append(
|
140 |
+
f"[{i+1}:a]volume=12dB,adelay={delay_ms}|{delay_ms},apad=whole_dur={video_duration}[a{i}]"
|
141 |
+
)
|
142 |
|
143 |
+
# Mix all audio streams with normalization
|
144 |
+
mix_inputs = ''.join([f"[a{i}]" for i in range(len(timings))])
|
145 |
+
filter_complex = ";".join(filter_chains) + \
|
146 |
+
f";{mix_inputs}amix=inputs={len(timings)}:duration=longest:normalize=0,volume=3dB[aout]"
|
147 |
+
|
148 |
+
cmd += [
|
149 |
'-filter_complex', filter_complex,
|
150 |
'-map', '[aout]',
|
151 |
+
'-c:a', 'libmp3lame', # Changed to MP3 codec
|
152 |
+
'-b:a', '192k',
|
153 |
+
str(output_audio)
|
154 |
+
]
|
155 |
+
|
156 |
+
logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
|
157 |
+
|
158 |
+
# Execute audio mixing
|
159 |
process = subprocess.run(cmd, capture_output=True, text=True)
|
160 |
|
161 |
if process.returncode != 0:
|
162 |
+
logger.error(f"Audio mixing failed: {process.stderr}")
|
|
|
163 |
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
164 |
create_silent_audio(video_duration, silent_audio)
|
165 |
+
return silent_audio
|
166 |
+
|
167 |
+
logger.info(f"Final audio file size: {output_audio.stat().st_size} bytes")
|
168 |
+
|
169 |
+
# Cleanup temporary files
|
170 |
try:
|
171 |
shutil.rmtree(temp_dir)
|
172 |
+
logger.debug(f"Cleaned temporary directory: {temp_dir}")
|
173 |
except Exception as e:
|
174 |
+
logger.warning(f"Temp cleanup failed: {str(e)}")
|
175 |
+
|
|
|
176 |
return output_audio
|
177 |
+
|
178 |
except Exception as e:
|
179 |
+
logger.error(f"Audio generation failed: {str(e)}", exc_info=True)
|
180 |
+
silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
|
181 |
+
create_silent_audio(video_duration, silent_audio)
|
182 |
+
return silent_audio
|
|
|
|
|
|
|
|
|
|