Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,14 +8,21 @@ import infer_tool
|
|
8 |
|
9 |
convert_cnt = [0]
|
10 |
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
11 |
-
model_name = "
|
12 |
-
config_name = "
|
13 |
net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}")
|
14 |
|
15 |
# 获取config参数
|
16 |
target_sample = hps_ms.data.sampling_rate
|
17 |
spk_dict = {
|
18 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
}
|
20 |
|
21 |
|
@@ -25,13 +32,9 @@ def vc_fn(sid, audio_record, audio_upload, tran):
|
|
25 |
audio_path = audio_upload
|
26 |
elif audio_record is not None:
|
27 |
audio_path = audio_record
|
28 |
-
else:
|
29 |
-
return "你需要上传wav文件或使用网页内置的录音!", None
|
30 |
|
31 |
audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample)
|
32 |
duration = audio.shape[0] / sampling_rate
|
33 |
-
if duration > 60:
|
34 |
-
return "请上传小于60s的音频,需要转换长音频请使用colab", None
|
35 |
|
36 |
o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input)
|
37 |
out_path = f"./out_temp.wav"
|
@@ -47,12 +50,8 @@ with app:
|
|
47 |
with gr.Tabs():
|
48 |
with gr.TabItem("Basic"):
|
49 |
gr.Markdown(value="""
|
50 |
-
|
51 |
-
|
52 |
-
本模型为sovits_f0,支持**60s以内**的**无伴奏**wav、mp3格式,或使用**网页内置**的录音(二选一)
|
53 |
-
|
54 |
-
**error就用格式工厂自行转换为wav再上传**
|
55 |
-
|
56 |
转换效果取决于源音频语气、节奏是否与目标音色相近。
|
57 |
|
58 |
源音频为女声时,**建议降3-6key**,**最后的输出误差越接近0,音准越高**
|
@@ -64,7 +63,7 @@ with app:
|
|
64 |
若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好
|
65 |
|
66 |
""")
|
67 |
-
speaker_id = gr.Dropdown(label="音色", choices=[
|
68 |
record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
|
69 |
upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath",
|
70 |
elem_id="audio_inputs")
|
@@ -75,24 +74,4 @@ with app:
|
|
75 |
f0_image = gr.Image(label="f0曲线")
|
76 |
vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform],
|
77 |
[out_message, out_audio, f0_image])
|
78 |
-
|
79 |
-
gr.Markdown(value="""
|
80 |
-
0、合集:https://github.com/IceKyrin/sovits_guide/blob/main/README.md
|
81 |
-
|
82 |
-
1、仅支持sovit_f0(sovits2.0)模型
|
83 |
-
|
84 |
-
2、自行下载hubert-soft-0d54a1f4.pt改名为hubert.pt(已经下好了)
|
85 |
-
https://github.com/bshall/hubert/releases/tag/v0.1
|
86 |
-
|
87 |
-
3、pth文件夹下放置sovits2.0的模型
|
88 |
-
|
89 |
-
4、与模型配套的xxx.json,需有speaker项——人物列表
|
90 |
-
|
91 |
-
5、放无伴奏的音频、或网页内置录音,不要放奇奇怪怪的格式
|
92 |
-
|
93 |
-
6、仅供交流使用,不对用户行为负责
|
94 |
-
|
95 |
-
7、268000为44100预模型,配合sovits_pre.json;50000为22050预模型,配合nyarumul.json
|
96 |
-
|
97 |
-
""")
|
98 |
-
app.launch()
|
|
|
8 |
|
9 |
convert_cnt = [0]
|
10 |
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
11 |
+
model_name = "G.pth"
|
12 |
+
config_name = "config.json"
|
13 |
net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}")
|
14 |
|
15 |
# 获取config参数
|
16 |
target_sample = hps_ms.data.sampling_rate
|
17 |
spk_dict = {
|
18 |
+
"鸢一折纸": 0,
|
19 |
+
"时崎狂三": 1,
|
20 |
+
"冰芽川四糸乃": 2,
|
21 |
+
"五河琴里": 3,
|
22 |
+
"八舞夕弦": 4,
|
23 |
+
"八舞耶俱矢": 5,
|
24 |
+
"诱宵美九": 6,
|
25 |
+
"夜刀神十香": 7
|
26 |
}
|
27 |
|
28 |
|
|
|
32 |
audio_path = audio_upload
|
33 |
elif audio_record is not None:
|
34 |
audio_path = audio_record
|
|
|
|
|
35 |
|
36 |
audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample)
|
37 |
duration = audio.shape[0] / sampling_rate
|
|
|
|
|
38 |
|
39 |
o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input)
|
40 |
out_path = f"./out_temp.wav"
|
|
|
50 |
with gr.Tabs():
|
51 |
with gr.TabItem("Basic"):
|
52 |
gr.Markdown(value="""
|
53 |
+
源码参考: (xiaolang/sovits_f0)[https://huggingface.co/spaces/xiaolang/sovits_f0/tree/main]
|
54 |
+
**音频格式为wav**
|
|
|
|
|
|
|
|
|
55 |
转换效果取决于源音频语气、节奏是否与目标音色相近。
|
56 |
|
57 |
源音频为女声时,**建议降3-6key**,**最后的输出误差越接近0,音准越高**
|
|
|
63 |
若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好
|
64 |
|
65 |
""")
|
66 |
+
speaker_id = gr.Dropdown(label="音色", choices=[spk_dict.keys], value=[spk_dict.values])
|
67 |
record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
|
68 |
upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath",
|
69 |
elem_id="audio_inputs")
|
|
|
74 |
f0_image = gr.Image(label="f0曲线")
|
75 |
vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform],
|
76 |
[out_message, out_audio, f0_image])
|
77 |
+
app.launch(server_name="0.0.0.0")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|