clip audio between [-1,1]
Browse files
app.py
CHANGED
@@ -293,8 +293,10 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
|
|
293 |
vc_wave = bigvgan_fn(vc_target.float())[0]
|
294 |
if processed_frames == 0:
|
295 |
if is_last_chunk:
|
|
|
296 |
output_wave = vc_wave[0].cpu().numpy()
|
297 |
generated_wave_chunks.append(output_wave)
|
|
|
298 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
299 |
mp3_bytes = AudioSegment(
|
300 |
output_wave.tobytes(), frame_rate=sr,
|
@@ -306,6 +308,7 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
|
|
306 |
generated_wave_chunks.append(output_wave)
|
307 |
previous_chunk = vc_wave[0, -overlap_wave_len:]
|
308 |
processed_frames += vc_target.size(2) - overlap_frame_len
|
|
|
309 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
310 |
mp3_bytes = AudioSegment(
|
311 |
output_wave.tobytes(), frame_rate=sr,
|
@@ -316,6 +319,7 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
|
|
316 |
output_wave = crossfade(previous_chunk.cpu().numpy(), vc_wave[0].cpu().numpy(), overlap_wave_len)
|
317 |
generated_wave_chunks.append(output_wave)
|
318 |
processed_frames += vc_target.size(2) - overlap_frame_len
|
|
|
319 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
320 |
mp3_bytes = AudioSegment(
|
321 |
output_wave.tobytes(), frame_rate=sr,
|
@@ -328,6 +332,7 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
|
|
328 |
generated_wave_chunks.append(output_wave)
|
329 |
previous_chunk = vc_wave[0, -overlap_wave_len:]
|
330 |
processed_frames += vc_target.size(2) - overlap_frame_len
|
|
|
331 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
332 |
mp3_bytes = AudioSegment(
|
333 |
output_wave.tobytes(), frame_rate=sr,
|
|
|
293 |
vc_wave = bigvgan_fn(vc_target.float())[0]
|
294 |
if processed_frames == 0:
|
295 |
if is_last_chunk:
|
296 |
+
# output_wave = torch.clip(vc_wave[0], -0.999, 0.999).cpu().numpy()
|
297 |
output_wave = vc_wave[0].cpu().numpy()
|
298 |
generated_wave_chunks.append(output_wave)
|
299 |
+
output_wave = np.clip(output_wave, -0.999, 0.999)
|
300 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
301 |
mp3_bytes = AudioSegment(
|
302 |
output_wave.tobytes(), frame_rate=sr,
|
|
|
308 |
generated_wave_chunks.append(output_wave)
|
309 |
previous_chunk = vc_wave[0, -overlap_wave_len:]
|
310 |
processed_frames += vc_target.size(2) - overlap_frame_len
|
311 |
+
output_wave = np.clip(output_wave, -0.999, 0.999)
|
312 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
313 |
mp3_bytes = AudioSegment(
|
314 |
output_wave.tobytes(), frame_rate=sr,
|
|
|
319 |
output_wave = crossfade(previous_chunk.cpu().numpy(), vc_wave[0].cpu().numpy(), overlap_wave_len)
|
320 |
generated_wave_chunks.append(output_wave)
|
321 |
processed_frames += vc_target.size(2) - overlap_frame_len
|
322 |
+
output_wave = np.clip(output_wave, -0.999, 0.999)
|
323 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
324 |
mp3_bytes = AudioSegment(
|
325 |
output_wave.tobytes(), frame_rate=sr,
|
|
|
332 |
generated_wave_chunks.append(output_wave)
|
333 |
previous_chunk = vc_wave[0, -overlap_wave_len:]
|
334 |
processed_frames += vc_target.size(2) - overlap_frame_len
|
335 |
+
output_wave = np.clip(output_wave, -0.999, 0.999)
|
336 |
output_wave = (output_wave * 32768.0).astype(np.int16)
|
337 |
mp3_bytes = AudioSegment(
|
338 |
output_wave.tobytes(), frame_rate=sr,
|