hzrr commited on
Commit
43a274e
·
1 Parent(s): f84ea21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -35
app.py CHANGED
@@ -8,14 +8,21 @@ import infer_tool
8
 
9
  convert_cnt = [0]
10
  dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
- model_name = "152_epochs.pth"
12
- config_name = "nyarumul.json"
13
  net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}")
14
 
15
  # 获取config参数
16
  target_sample = hps_ms.data.sampling_rate
17
  spk_dict = {
18
- "奕兰秋": 4
 
 
 
 
 
 
 
19
  }
20
 
21
 
@@ -25,13 +32,9 @@ def vc_fn(sid, audio_record, audio_upload, tran):
25
  audio_path = audio_upload
26
  elif audio_record is not None:
27
  audio_path = audio_record
28
- else:
29
- return "你需要上传wav文件或使用网页内置的录音!", None
30
 
31
  audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample)
32
  duration = audio.shape[0] / sampling_rate
33
- if duration > 60:
34
- return "请上传小于60s的音频,需要转换长音频请使用colab", None
35
 
36
  o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input)
37
  out_path = f"./out_temp.wav"
@@ -47,12 +50,8 @@ with app:
47
  with gr.Tabs():
48
  with gr.TabItem("Basic"):
49
  gr.Markdown(value="""
50
- 本音源有授权,二创不创死主播即可。[其他音色体验](https://huggingface.co/spaces/innnky/nyaru-svc2.0-advanced)
51
-
52
- 本模型为sovits_f0,支持**60s以内**的**无伴奏**wav、mp3格式,或使用**网页内置**的录音(二选一)
53
-
54
- **error就用格式工厂自行转换为wav再上传**
55
-
56
  转换效果取决于源音频语气、节奏是否与目标音色相近。
57
 
58
  源音频为女声时,**建议降3-6key**,**最后的输出误差越接近0,音准越高**
@@ -64,7 +63,7 @@ with app:
64
  若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好
65
 
66
  """)
67
- speaker_id = gr.Dropdown(label="音色", choices=["奕兰秋"], value="奕兰秋")
68
  record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
69
  upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath",
70
  elem_id="audio_inputs")
@@ -75,24 +74,4 @@ with app:
75
  f0_image = gr.Image(label="f0曲线")
76
  vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform],
77
  [out_message, out_audio, f0_image])
78
- with gr.TabItem("使用说明"):
79
- gr.Markdown(value="""
80
- 0、合集:https://github.com/IceKyrin/sovits_guide/blob/main/README.md
81
-
82
- 1、仅支持sovit_f0(sovits2.0)模型
83
-
84
- 2、自行下载hubert-soft-0d54a1f4.pt改名为hubert.pt(已经下好了)
85
- https://github.com/bshall/hubert/releases/tag/v0.1
86
-
87
- 3、pth文件夹下放置sovits2.0的模型
88
-
89
- 4、与模型配套的xxx.json,需有speaker项——人物列表
90
-
91
- 5、放无伴奏的音频、或网页内置录音,不要放奇奇怪怪的格式
92
-
93
- 6、仅供交流使用,不对用户行为负责
94
-
95
- 7、268000为44100预模型,配合sovits_pre.json;50000为22050预模型,配合nyarumul.json
96
-
97
- """)
98
- app.launch()
 
8
 
9
  convert_cnt = [0]
10
  dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
+ model_name = "G.pth"
12
+ config_name = "config.json"
13
  net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}")
14
 
15
  # 获取config参数
16
  target_sample = hps_ms.data.sampling_rate
17
  spk_dict = {
18
+ "鸢一折纸": 0,
19
+ "时崎狂三": 1,
20
+ "冰芽川四糸乃": 2,
21
+ "五河琴里": 3,
22
+ "八舞夕弦": 4,
23
+ "八舞耶俱矢": 5,
24
+ "诱宵美九": 6,
25
+ "夜刀神十香": 7
26
  }
27
 
28
 
 
32
  audio_path = audio_upload
33
  elif audio_record is not None:
34
  audio_path = audio_record
 
 
35
 
36
  audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample)
37
  duration = audio.shape[0] / sampling_rate
 
 
38
 
39
  o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input)
40
  out_path = f"./out_temp.wav"
 
50
  with gr.Tabs():
51
  with gr.TabItem("Basic"):
52
  gr.Markdown(value="""
53
+ 源码参考: (xiaolang/sovits_f0)[https://huggingface.co/spaces/xiaolang/sovits_f0/tree/main]
54
+ **音频格式为wav**
 
 
 
 
55
  转换效果取决于源音频语气、节奏是否与目标音色相近。
56
 
57
  源音频为女声时,**建议降3-6key**,**最后的输出误差越接近0,音准越高**
 
63
  若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好
64
 
65
  """)
66
+ speaker_id = gr.Dropdown(label="音色", choices=[spk_dict.keys], value=[spk_dict.values])
67
  record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
68
  upload_input = gr.Audio(source="upload", label="上传音频(长度小于45秒)", type="filepath",
69
  elem_id="audio_inputs")
 
74
  f0_image = gr.Image(label="f0曲线")
75
  vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform],
76
  [out_message, out_audio, f0_image])
77
+ app.launch(server_name="0.0.0.0")