Spaces:
Build error
Build error
积极的屁孩
commited on
Commit
·
ced52e3
1
Parent(s):
b3c35e4
debug
Browse files
README.md
CHANGED
@@ -11,3 +11,36 @@ license: apache-2.0
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
# Vevo语音转换模型演示
|
16 |
+
|
17 |
+
这是基于[amphion/Vevo](https://huggingface.co/amphion/Vevo)模型的Gradio演示应用。Vevo是一个强大的语音转换模型,支持多种语音处理功能。
|
18 |
+
|
19 |
+
## 功能
|
20 |
+
|
21 |
+
- **语音转换 (VevoVoice)**: 将内容音频的内容转换为参考音频的风格和音色
|
22 |
+
- **风格转换 (VevoStyle)**: 将内容音频的风格转换为参考音频的风格,保留原始音色
|
23 |
+
- **音色转换 (VevoTimbre)**: 将内容音频的音色转换为参考音频的音色,保留内容和风格
|
24 |
+
- **文本转语音 (VevoTTS)**: 将输入文本转换为语音,使用参考音频的风格和音色
|
25 |
+
|
26 |
+
## 安装
|
27 |
+
|
28 |
+
```bash
|
29 |
+
pip install -r requirements.txt
|
30 |
+
```
|
31 |
+
|
32 |
+
## 运行
|
33 |
+
|
34 |
+
```bash
|
35 |
+
python app.py
|
36 |
+
```
|
37 |
+
|
38 |
+
## 模型来源
|
39 |
+
|
40 |
+
- 模型:[amphion/Vevo](https://huggingface.co/amphion/Vevo)
|
41 |
+
- 原始仓库:[open-mmlab/Amphion](https://github.com/open-mmlab/Amphion)
|
42 |
+
|
43 |
+
## 注意事项
|
44 |
+
|
45 |
+
- 首次运行时,会自动下载模型文件,可能需要一些时间
|
46 |
+
- 音频输入应为WAV格式,采样率为24kHz
|
app.py
CHANGED
@@ -334,24 +334,15 @@ class VevoGradioApp:
|
|
334 |
"""语音转换功能"""
|
335 |
pipeline = self.init_voice_conversion_pipeline()
|
336 |
|
337 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
338 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as reference_file, \
|
339 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
340 |
-
|
341 |
-
content_path = content_file.name
|
342 |
-
reference_path = reference_file.name
|
343 |
output_path = output_file.name
|
344 |
|
345 |
-
# 保存上传的音频文件
|
346 |
-
content_audio.save(content_path)
|
347 |
-
reference_audio.save(reference_path)
|
348 |
-
|
349 |
# 执行语音转换
|
350 |
gen_audio = pipeline.inference_ar_and_fm(
|
351 |
-
src_wav_path=
|
352 |
src_text=None,
|
353 |
-
style_ref_wav_path=
|
354 |
-
timbre_ref_wav_path=
|
355 |
)
|
356 |
save_audio(gen_audio, output_path=output_path)
|
357 |
|
@@ -361,24 +352,15 @@ class VevoGradioApp:
|
|
361 |
"""风格转换功能"""
|
362 |
pipeline = self.init_voice_conversion_pipeline()
|
363 |
|
364 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
365 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as style_file, \
|
366 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
367 |
-
|
368 |
-
content_path = content_file.name
|
369 |
-
style_path = style_file.name
|
370 |
output_path = output_file.name
|
371 |
|
372 |
-
# 保存上传的音频文件
|
373 |
-
content_audio.save(content_path)
|
374 |
-
style_audio.save(style_path)
|
375 |
-
|
376 |
# 执行风格转换
|
377 |
gen_audio = pipeline.inference_ar_and_fm(
|
378 |
-
src_wav_path=
|
379 |
src_text=None,
|
380 |
-
style_ref_wav_path=
|
381 |
-
timbre_ref_wav_path=
|
382 |
)
|
383 |
save_audio(gen_audio, output_path=output_path)
|
384 |
|
@@ -388,22 +370,13 @@ class VevoGradioApp:
|
|
388 |
"""音色转换功能"""
|
389 |
pipeline = self.init_timbre_pipeline()
|
390 |
|
391 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
392 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as reference_file, \
|
393 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
394 |
-
|
395 |
-
content_path = content_file.name
|
396 |
-
reference_path = reference_file.name
|
397 |
output_path = output_file.name
|
398 |
|
399 |
-
# 保存上传的音频文件
|
400 |
-
content_audio.save(content_path)
|
401 |
-
reference_audio.save(reference_path)
|
402 |
-
|
403 |
# 执行音色转换
|
404 |
gen_audio = pipeline.inference_fm(
|
405 |
-
src_wav_path=
|
406 |
-
timbre_ref_wav_path=
|
407 |
flow_matching_steps=32,
|
408 |
)
|
409 |
save_audio(gen_audio, output_path=output_path)
|
@@ -414,21 +387,15 @@ class VevoGradioApp:
|
|
414 |
"""文本转语音功能"""
|
415 |
pipeline = self.init_tts_pipeline()
|
416 |
|
417 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
418 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
419 |
-
|
420 |
-
ref_path = ref_file.name
|
421 |
output_path = output_file.name
|
422 |
|
423 |
-
# 保存上传的音频文件
|
424 |
-
ref_audio.save(ref_path)
|
425 |
-
|
426 |
# 执行文本转语音
|
427 |
gen_audio = pipeline.inference_ar_and_fm(
|
428 |
src_wav_path=None,
|
429 |
src_text=text,
|
430 |
-
style_ref_wav_path=
|
431 |
-
timbre_ref_wav_path=
|
432 |
style_ref_wav_text=ref_text if ref_text else None,
|
433 |
src_text_language=src_language,
|
434 |
style_ref_wav_text_language=ref_language,
|
|
|
334 |
"""语音转换功能"""
|
335 |
pipeline = self.init_voice_conversion_pipeline()
|
336 |
|
337 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
|
|
|
|
338 |
output_path = output_file.name
|
339 |
|
|
|
|
|
|
|
|
|
340 |
# 执行语音转换
|
341 |
gen_audio = pipeline.inference_ar_and_fm(
|
342 |
+
src_wav_path=content_audio, # 直接使用路径
|
343 |
src_text=None,
|
344 |
+
style_ref_wav_path=reference_audio, # 直接使用路径
|
345 |
+
timbre_ref_wav_path=reference_audio,
|
346 |
)
|
347 |
save_audio(gen_audio, output_path=output_path)
|
348 |
|
|
|
352 |
"""风格转换功能"""
|
353 |
pipeline = self.init_voice_conversion_pipeline()
|
354 |
|
355 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
|
|
|
|
356 |
output_path = output_file.name
|
357 |
|
|
|
|
|
|
|
|
|
358 |
# 执行风格转换
|
359 |
gen_audio = pipeline.inference_ar_and_fm(
|
360 |
+
src_wav_path=content_audio, # 直接使用路径
|
361 |
src_text=None,
|
362 |
+
style_ref_wav_path=style_audio, # 直接使用路径
|
363 |
+
timbre_ref_wav_path=content_audio,
|
364 |
)
|
365 |
save_audio(gen_audio, output_path=output_path)
|
366 |
|
|
|
370 |
"""音色转换功能"""
|
371 |
pipeline = self.init_timbre_pipeline()
|
372 |
|
373 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
|
|
|
|
374 |
output_path = output_file.name
|
375 |
|
|
|
|
|
|
|
|
|
376 |
# 执行音色转换
|
377 |
gen_audio = pipeline.inference_fm(
|
378 |
+
src_wav_path=content_audio, # 直接使用路径
|
379 |
+
timbre_ref_wav_path=reference_audio, # 直接使用路径
|
380 |
flow_matching_steps=32,
|
381 |
)
|
382 |
save_audio(gen_audio, output_path=output_path)
|
|
|
387 |
"""文本转语音功能"""
|
388 |
pipeline = self.init_tts_pipeline()
|
389 |
|
390 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
391 |
output_path = output_file.name
|
392 |
|
|
|
|
|
|
|
393 |
# 执行文本转语音
|
394 |
gen_audio = pipeline.inference_ar_and_fm(
|
395 |
src_wav_path=None,
|
396 |
src_text=text,
|
397 |
+
style_ref_wav_path=ref_audio, # 直接使用路径
|
398 |
+
timbre_ref_wav_path=ref_audio,
|
399 |
style_ref_wav_text=ref_text if ref_text else None,
|
400 |
src_text_language=src_language,
|
401 |
style_ref_wav_text_language=ref_language,
|