积极的屁孩 commited on
Commit
ced52e3
·
1 Parent(s): b3c35e4
Files changed (2) hide show
  1. README.md +33 -0
  2. app.py +14 -47
README.md CHANGED
@@ -11,3 +11,36 @@ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ # Vevo语音转换模型演示
16
+
17
+ 这是基于[amphion/Vevo](https://huggingface.co/amphion/Vevo)模型的Gradio演示应用。Vevo是一个强大的语音转换模型,支持多种语音处理功能。
18
+
19
+ ## 功能
20
+
21
+ - **语音转换 (VevoVoice)**: 将内容音频的内容转换为参考音频的风格和音色
22
+ - **风格转换 (VevoStyle)**: 将内容音频的风格转换为参考音频的风格,保留原始音色
23
+ - **音色转换 (VevoTimbre)**: 将内容音频的音色转换为参考音频的音色,保留内容和风格
24
+ - **文本转语音 (VevoTTS)**: 将输入文本转换为语音,使用参考音频的风格和音色
25
+
26
+ ## 安装
27
+
28
+ ```bash
29
+ pip install -r requirements.txt
30
+ ```
31
+
32
+ ## 运行
33
+
34
+ ```bash
35
+ python app.py
36
+ ```
37
+
38
+ ## 模型来源
39
+
40
+ - 模型:[amphion/Vevo](https://huggingface.co/amphion/Vevo)
41
+ - 原始仓库:[open-mmlab/Amphion](https://github.com/open-mmlab/Amphion)
42
+
43
+ ## 注意事项
44
+
45
+ - 首次运行时,会自动下载模型文件,可能需要一些时间
46
+ - 音频输入应为WAV格式,采样率为24kHz
app.py CHANGED
@@ -334,24 +334,15 @@ class VevoGradioApp:
334
  """语音转换功能"""
335
  pipeline = self.init_voice_conversion_pipeline()
336
 
337
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as content_file, \
338
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as reference_file, \
339
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
340
-
341
- content_path = content_file.name
342
- reference_path = reference_file.name
343
  output_path = output_file.name
344
 
345
- # 保存上传的音频文件
346
- content_audio.save(content_path)
347
- reference_audio.save(reference_path)
348
-
349
  # 执行语音转换
350
  gen_audio = pipeline.inference_ar_and_fm(
351
- src_wav_path=content_path,
352
  src_text=None,
353
- style_ref_wav_path=reference_path,
354
- timbre_ref_wav_path=reference_path,
355
  )
356
  save_audio(gen_audio, output_path=output_path)
357
 
@@ -361,24 +352,15 @@ class VevoGradioApp:
361
  """风格转换功能"""
362
  pipeline = self.init_voice_conversion_pipeline()
363
 
364
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as content_file, \
365
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as style_file, \
366
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
367
-
368
- content_path = content_file.name
369
- style_path = style_file.name
370
  output_path = output_file.name
371
 
372
- # 保存上传的音频文件
373
- content_audio.save(content_path)
374
- style_audio.save(style_path)
375
-
376
  # 执行风格转换
377
  gen_audio = pipeline.inference_ar_and_fm(
378
- src_wav_path=content_path,
379
  src_text=None,
380
- style_ref_wav_path=style_path,
381
- timbre_ref_wav_path=content_path,
382
  )
383
  save_audio(gen_audio, output_path=output_path)
384
 
@@ -388,22 +370,13 @@ class VevoGradioApp:
388
  """音色转换功能"""
389
  pipeline = self.init_timbre_pipeline()
390
 
391
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as content_file, \
392
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as reference_file, \
393
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
394
-
395
- content_path = content_file.name
396
- reference_path = reference_file.name
397
  output_path = output_file.name
398
 
399
- # 保存上传的音频文件
400
- content_audio.save(content_path)
401
- reference_audio.save(reference_path)
402
-
403
  # 执行音色转换
404
  gen_audio = pipeline.inference_fm(
405
- src_wav_path=content_path,
406
- timbre_ref_wav_path=reference_path,
407
  flow_matching_steps=32,
408
  )
409
  save_audio(gen_audio, output_path=output_path)
@@ -414,21 +387,15 @@ class VevoGradioApp:
414
  """文本转语音功能"""
415
  pipeline = self.init_tts_pipeline()
416
 
417
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref_file, \
418
- tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
419
-
420
- ref_path = ref_file.name
421
  output_path = output_file.name
422
 
423
- # 保存上传的音频文件
424
- ref_audio.save(ref_path)
425
-
426
  # 执行文本转语音
427
  gen_audio = pipeline.inference_ar_and_fm(
428
  src_wav_path=None,
429
  src_text=text,
430
- style_ref_wav_path=ref_path,
431
- timbre_ref_wav_path=ref_path,
432
  style_ref_wav_text=ref_text if ref_text else None,
433
  src_text_language=src_language,
434
  style_ref_wav_text_language=ref_language,
 
334
  """语音转换功能"""
335
  pipeline = self.init_voice_conversion_pipeline()
336
 
337
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
 
 
 
 
 
338
  output_path = output_file.name
339
 
 
 
 
 
340
  # 执行语音转换
341
  gen_audio = pipeline.inference_ar_and_fm(
342
+ src_wav_path=content_audio, # 直接使用路径
343
  src_text=None,
344
+ style_ref_wav_path=reference_audio, # 直接使用路径
345
+ timbre_ref_wav_path=reference_audio,
346
  )
347
  save_audio(gen_audio, output_path=output_path)
348
 
 
352
  """风格转换功能"""
353
  pipeline = self.init_voice_conversion_pipeline()
354
 
355
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
 
 
 
 
 
356
  output_path = output_file.name
357
 
 
 
 
 
358
  # 执行风格转换
359
  gen_audio = pipeline.inference_ar_and_fm(
360
+ src_wav_path=content_audio, # 直接使用路径
361
  src_text=None,
362
+ style_ref_wav_path=style_audio, # 直接使用路径
363
+ timbre_ref_wav_path=content_audio,
364
  )
365
  save_audio(gen_audio, output_path=output_path)
366
 
 
370
  """音色转换功能"""
371
  pipeline = self.init_timbre_pipeline()
372
 
373
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
 
 
 
 
 
374
  output_path = output_file.name
375
 
 
 
 
 
376
  # 执行音色转换
377
  gen_audio = pipeline.inference_fm(
378
+ src_wav_path=content_audio, # 直接使用路径
379
+ timbre_ref_wav_path=reference_audio, # 直接使用路径
380
  flow_matching_steps=32,
381
  )
382
  save_audio(gen_audio, output_path=output_path)
 
387
  """文本转语音功能"""
388
  pipeline = self.init_tts_pipeline()
389
 
390
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
 
 
 
391
  output_path = output_file.name
392
 
 
 
 
393
  # 执行文本转语音
394
  gen_audio = pipeline.inference_ar_and_fm(
395
  src_wav_path=None,
396
  src_text=text,
397
+ style_ref_wav_path=ref_audio, # 直接使用路径
398
+ timbre_ref_wav_path=ref_audio,
399
  style_ref_wav_text=ref_text if ref_text else None,
400
  src_text_language=src_language,
401
  style_ref_wav_text_language=ref_language,