lshzhm commited on
Commit
6a8113c
·
1 Parent(s): 4427b01

Update infer_cli_test.py

Browse files
F5-TTS/src/f5_tts/infer/infer_cli_test.py CHANGED
@@ -472,6 +472,7 @@ if __name__ == "__main__":
472
  energy.append(waveform_v2a[0,int(i*sr_v2a*(256/24000)):int((i+1)*sr_v2a*(256/24000))].abs().mean())
473
  energy = np.array(energy)
474
  energy = energy / max(energy)
 
475
 
476
  waveform_p, sr_p = torchaudio.load(wav_p)
477
  duration_p = waveform_p.shape[-1] / sr_p
@@ -480,6 +481,7 @@ if __name__ == "__main__":
480
  energy_p.append(waveform_p[0,int(i*sr_p*(256/24000)):int((i+1)*sr_p*(256/24000))].abs().mean())
481
  energy_p = np.array(energy_p)
482
  energy_p = energy_p / max(energy_p)
 
483
 
484
  #print("energy shape", energy_p.shape, energy.shape)
485
  #energy = torch.cat([energy_p, energy], dim=1)
 
472
  energy.append(waveform_v2a[0,int(i*sr_v2a*(256/24000)):int((i+1)*sr_v2a*(256/24000))].abs().mean())
473
  energy = np.array(energy)
474
  energy = energy / max(energy)
475
+ energy = torch.from_numpy(energy).unsqueeze(0).unsqueeze(2)
476
 
477
  waveform_p, sr_p = torchaudio.load(wav_p)
478
  duration_p = waveform_p.shape[-1] / sr_p
 
481
  energy_p.append(waveform_p[0,int(i*sr_p*(256/24000)):int((i+1)*sr_p*(256/24000))].abs().mean())
482
  energy_p = np.array(energy_p)
483
  energy_p = energy_p / max(energy_p)
484
+ energy_p = torch.from_numpy(energy_p).unsqueeze(0).unsqueeze(2)
485
 
486
  #print("energy shape", energy_p.shape, energy.shape)
487
  #energy = torch.cat([energy_p, energy], dim=1)