Maaz1 commited on
Commit
e0f225e
·
verified ·
1 Parent(s): 54ba66a

Update src/audio/generator.py

Browse files
Files changed (1) hide show
  1. src/audio/generator.py +57 -61
src/audio/generator.py CHANGED
@@ -76,11 +76,13 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
76
  retry_count = 0
77
  while retry_count < MAX_RETRY_ATTEMPTS:
78
  try:
79
- # For certain languages, use slower speed which might improve reliability
80
  slow_option = target_lang in ["hi", "ja", "zh-CN", "ar"]
81
  tts = gTTS(text=text, lang=target_lang, slow=slow_option)
82
  tts.save(str(audio_file))
83
 
 
 
84
  if audio_file.exists() and audio_file.stat().st_size > 0:
85
  break
86
  else:
@@ -89,9 +91,9 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
89
  except Exception as e:
90
  retry_count += 1
91
  logger.warning(f"TTS attempt {retry_count} failed for {target_lang}: {str(e)}")
92
- time.sleep(1) # Wait before retrying
93
 
94
- # If still failing after retries, try with shorter text
95
  if retry_count == MAX_RETRY_ATTEMPTS - 1 and len(text) > 100:
96
  logger.warning(f"Trying with shortened text for {target_lang}")
97
  shortened_text = text[:100] + "..."
@@ -104,83 +106,77 @@ def generate_translated_audio(srt_path, target_lang, video_duration=180):
104
  else:
105
  logger.warning(f"Failed to generate audio for subtitle {i}")
106
 
107
- # Check if we generated any audio files
108
  if not audio_files:
109
- logger.warning(f"No audio files were generated for {target_lang}")
110
- # Create a silent audio file as fallback
111
  silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
112
  create_silent_audio(video_duration, silent_audio)
113
  return silent_audio
114
-
115
- # Create a silent audio track as base
 
116
  silence_file = temp_dir / "silence.wav"
117
  create_silent_audio(video_duration, silence_file)
118
-
119
- # Create filter complex for audio mixing
120
- filter_complex = []
121
- input_count = 1 # Starting with 1 because 0 is the silence track
122
-
123
- # Start with silent track
124
- filter_parts = ["[0:a]"]
125
-
126
- # Add each audio segment
127
- for start_time, end_time, duration, audio_file in timings:
128
- delay_ms = int(start_time * 1000)
129
- filter_parts.append(f"[{input_count}:a]adelay={delay_ms}|{delay_ms}")
130
- input_count += 1
131
-
132
- # Mix all audio tracks
133
- filter_parts.append(f"amix=inputs={input_count}:dropout_transition=0:normalize=0[aout]")
134
- filter_complex = ";".join(filter_parts)
135
-
136
- # Build the ffmpeg command
137
  cmd = ['ffmpeg', '-y']
 
138
 
139
- # Add silent base track
140
- cmd.extend(['-i', str(silence_file)])
141
-
142
- # Add all audio chunks
143
  for audio_file in audio_files:
144
- cmd.extend(['-i', str(audio_file)])
145
-
146
- # Add filter complex and output
147
- output_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.mp3"
 
 
 
 
 
148
 
149
- output_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
150
- cmd.extend([
 
 
 
 
151
  '-filter_complex', filter_complex,
152
  '-map', '[aout]',
153
- output_audio
154
- ])
155
-
156
- # Run the command
157
- logger.info(f"Combining {len(audio_files)} audio segments")
158
- logger.debug(f"Running command: {' '.join(cmd)}")
 
 
159
  process = subprocess.run(cmd, capture_output=True, text=True)
160
 
161
  if process.returncode != 0:
162
- logger.error(f"Audio combination failed: {process.stderr}")
163
- # Create a fallback silent audio
164
  silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
165
  create_silent_audio(video_duration, silent_audio)
166
- output_audio = silent_audio
167
-
168
- # Clean up temporary files
 
 
169
  try:
170
  shutil.rmtree(temp_dir)
171
- logger.debug(f"Cleaned up temporary directory: {temp_dir}")
172
  except Exception as e:
173
- logger.warning(f"Failed to clean up temp directory: {str(e)}")
174
-
175
- logger.info(f"Successfully created translated audio: {output_audio}")
176
  return output_audio
 
177
  except Exception as e:
178
- logger.error(f"Audio translation failed: {str(e)}", exc_info=True)
179
-
180
- # Create an emergency fallback silent audio
181
- try:
182
- silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
183
- create_silent_audio(video_duration, silent_audio)
184
- return silent_audio
185
- except:
186
- raise Exception(f"Audio translation failed: {str(e)}")
 
76
  retry_count = 0
77
  while retry_count < MAX_RETRY_ATTEMPTS:
78
  try:
79
+ # For certain languages, use slower speed
80
  slow_option = target_lang in ["hi", "ja", "zh-CN", "ar"]
81
  tts = gTTS(text=text, lang=target_lang, slow=slow_option)
82
  tts.save(str(audio_file))
83
 
84
+ logger.info(f"Generated TTS file size for chunk {i}: {audio_file.stat().st_size} bytes")
85
+
86
  if audio_file.exists() and audio_file.stat().st_size > 0:
87
  break
88
  else:
 
91
  except Exception as e:
92
  retry_count += 1
93
  logger.warning(f"TTS attempt {retry_count} failed for {target_lang}: {str(e)}")
94
+ time.sleep(1)
95
 
96
+ # Fallback to shortened text
97
  if retry_count == MAX_RETRY_ATTEMPTS - 1 and len(text) > 100:
98
  logger.warning(f"Trying with shortened text for {target_lang}")
99
  shortened_text = text[:100] + "..."
 
106
  else:
107
  logger.warning(f"Failed to generate audio for subtitle {i}")
108
 
109
+ # Fallback if no audio generated
110
  if not audio_files:
111
+ logger.warning(f"No audio files generated for {target_lang}")
 
112
  silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
113
  create_silent_audio(video_duration, silent_audio)
114
  return silent_audio
115
+
116
+ # Output configuration
117
+ output_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.mp3"
118
  silence_file = temp_dir / "silence.wav"
119
  create_silent_audio(video_duration, silence_file)
120
+
121
+ # Validate input files
122
+ for f in [silence_file, *audio_files]:
123
+ if not f.exists():
124
+ logger.error(f"Missing input file: {f}")
125
+ return create_silent_audio(video_duration, output_audio)
126
+
127
+ # Build FFmpeg command with volume boost and timing
 
 
 
 
 
 
 
 
 
 
 
128
  cmd = ['ffmpeg', '-y']
129
+ cmd += ['-i', str(silence_file)]
130
 
131
+ # Add all audio chunks as inputs
 
 
 
132
  for audio_file in audio_files:
133
+ cmd += ['-i', str(audio_file)]
134
+
135
+ # Create filter chain for each audio chunk
136
+ filter_chains = []
137
+ for i, (start_time, _, _, _) in enumerate(timings):
138
+ delay_ms = int(start_time * 1000)
139
+ filter_chains.append(
140
+ f"[{i+1}:a]volume=12dB,adelay={delay_ms}|{delay_ms},apad=whole_dur={video_duration}[a{i}]"
141
+ )
142
 
143
+ # Mix all audio streams with normalization
144
+ mix_inputs = ''.join([f"[a{i}]" for i in range(len(timings))])
145
+ filter_complex = ";".join(filter_chains) + \
146
+ f";{mix_inputs}amix=inputs={len(timings)}:duration=longest:normalize=0,volume=3dB[aout]"
147
+
148
+ cmd += [
149
  '-filter_complex', filter_complex,
150
  '-map', '[aout]',
151
+ '-c:a', 'libmp3lame', # Changed to MP3 codec
152
+ '-b:a', '192k',
153
+ str(output_audio)
154
+ ]
155
+
156
+ logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
157
+
158
+ # Execute audio mixing
159
  process = subprocess.run(cmd, capture_output=True, text=True)
160
 
161
  if process.returncode != 0:
162
+ logger.error(f"Audio mixing failed: {process.stderr}")
 
163
  silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
164
  create_silent_audio(video_duration, silent_audio)
165
+ return silent_audio
166
+
167
+ logger.info(f"Final audio file size: {output_audio.stat().st_size} bytes")
168
+
169
+ # Cleanup temporary files
170
  try:
171
  shutil.rmtree(temp_dir)
172
+ logger.debug(f"Cleaned temporary directory: {temp_dir}")
173
  except Exception as e:
174
+ logger.warning(f"Temp cleanup failed: {str(e)}")
175
+
 
176
  return output_audio
177
+
178
  except Exception as e:
179
+ logger.error(f"Audio generation failed: {str(e)}", exc_info=True)
180
+ silent_audio = OUTPUT_DIR / f"translated_audio_{target_lang}.wav"
181
+ create_silent_audio(video_duration, silent_audio)
182
+ return silent_audio