积极的屁孩 commited on
Commit
7d50d8a
·
1 Parent(s): 28f4e7c

fix vevo style

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -383,17 +383,25 @@ def vevo_style(content_wav, style_wav):
383
  else:
384
  raise ValueError("Invalid content audio format")
385
 
386
- if isinstance(style_wav, tuple) and len(style_wav) == 2:
387
- # 确保正确的顺序 (data, sample_rate)
388
- if isinstance(style_wav[0], np.ndarray):
389
- style_data, style_sr = style_wav
390
- else:
391
- style_sr, style_data = style_wav
392
- style_tensor = torch.FloatTensor(style_data)
393
- if style_tensor.ndim == 1:
394
- style_tensor = style_tensor.unsqueeze(0) # 添加通道维度
395
  else:
396
- raise ValueError("Invalid style audio format")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
  # 打印debug信息
399
  print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")
 
383
  else:
384
  raise ValueError("Invalid content audio format")
385
 
386
+ if isinstance(style_wav[0], np.ndarray):
387
+ style_data, style_sr = style_wav
 
 
 
 
 
 
 
388
  else:
389
+ style_sr, style_data = style_wav
390
+
391
+ # 确保是单声道
392
+ if len(style_data.shape) > 1 and style_data.shape[1] > 1:
393
+ style_data = np.mean(style_data, axis=1)
394
+
395
+ # 重采样到24kHz
396
+ if style_sr != 24000:
397
+ style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
398
+ style_tensor = torchaudio.functional.resample(style_tensor, style_sr, 24000)
399
+ style_sr = 24000
400
+ else:
401
+ style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
402
+
403
+ # 归一化音量
404
+ style_tensor = style_tensor / (torch.max(torch.abs(style_tensor)) + 1e-6) * 0.95
405
 
406
  # 打印debug信息
407
  print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")