Spaces:
mengtoa
/
Running on Zero

mengtoa commited on
Commit
c54af5e
·
verified ·
1 Parent(s): 500b392

clip audio between [-1,1]

Browse files
Files changed (1) hide show
  1. app.py +5 -0
app.py CHANGED
@@ -293,8 +293,10 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
293
  vc_wave = bigvgan_fn(vc_target.float())[0]
294
  if processed_frames == 0:
295
  if is_last_chunk:
 
296
  output_wave = vc_wave[0].cpu().numpy()
297
  generated_wave_chunks.append(output_wave)
 
298
  output_wave = (output_wave * 32768.0).astype(np.int16)
299
  mp3_bytes = AudioSegment(
300
  output_wave.tobytes(), frame_rate=sr,
@@ -306,6 +308,7 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
306
  generated_wave_chunks.append(output_wave)
307
  previous_chunk = vc_wave[0, -overlap_wave_len:]
308
  processed_frames += vc_target.size(2) - overlap_frame_len
 
309
  output_wave = (output_wave * 32768.0).astype(np.int16)
310
  mp3_bytes = AudioSegment(
311
  output_wave.tobytes(), frame_rate=sr,
@@ -316,6 +319,7 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
316
  output_wave = crossfade(previous_chunk.cpu().numpy(), vc_wave[0].cpu().numpy(), overlap_wave_len)
317
  generated_wave_chunks.append(output_wave)
318
  processed_frames += vc_target.size(2) - overlap_frame_len
 
319
  output_wave = (output_wave * 32768.0).astype(np.int16)
320
  mp3_bytes = AudioSegment(
321
  output_wave.tobytes(), frame_rate=sr,
@@ -328,6 +332,7 @@ def voice_conversion(source, target, diffusion_steps, length_adjust, inference_c
328
  generated_wave_chunks.append(output_wave)
329
  previous_chunk = vc_wave[0, -overlap_wave_len:]
330
  processed_frames += vc_target.size(2) - overlap_frame_len
 
331
  output_wave = (output_wave * 32768.0).astype(np.int16)
332
  mp3_bytes = AudioSegment(
333
  output_wave.tobytes(), frame_rate=sr,
 
293
  vc_wave = bigvgan_fn(vc_target.float())[0]
294
  if processed_frames == 0:
295
  if is_last_chunk:
296
+ # output_wave = torch.clip(vc_wave[0], -0.999, 0.999).cpu().numpy()
297
  output_wave = vc_wave[0].cpu().numpy()
298
  generated_wave_chunks.append(output_wave)
299
+ output_wave = np.clip(output_wave, -0.999, 0.999)
300
  output_wave = (output_wave * 32768.0).astype(np.int16)
301
  mp3_bytes = AudioSegment(
302
  output_wave.tobytes(), frame_rate=sr,
 
308
  generated_wave_chunks.append(output_wave)
309
  previous_chunk = vc_wave[0, -overlap_wave_len:]
310
  processed_frames += vc_target.size(2) - overlap_frame_len
311
+ output_wave = np.clip(output_wave, -0.999, 0.999)
312
  output_wave = (output_wave * 32768.0).astype(np.int16)
313
  mp3_bytes = AudioSegment(
314
  output_wave.tobytes(), frame_rate=sr,
 
319
  output_wave = crossfade(previous_chunk.cpu().numpy(), vc_wave[0].cpu().numpy(), overlap_wave_len)
320
  generated_wave_chunks.append(output_wave)
321
  processed_frames += vc_target.size(2) - overlap_frame_len
322
+ output_wave = np.clip(output_wave, -0.999, 0.999)
323
  output_wave = (output_wave * 32768.0).astype(np.int16)
324
  mp3_bytes = AudioSegment(
325
  output_wave.tobytes(), frame_rate=sr,
 
332
  generated_wave_chunks.append(output_wave)
333
  previous_chunk = vc_wave[0, -overlap_wave_len:]
334
  processed_frames += vc_target.size(2) - overlap_frame_len
335
+ output_wave = np.clip(output_wave, -0.999, 0.999)
336
  output_wave = (output_wave * 32768.0).astype(np.int16)
337
  mp3_bytes = AudioSegment(
338
  output_wave.tobytes(), frame_rate=sr,