Spaces:

hzrr
/

sovits_datealive

Runtime error

App Files Files Community

hzrr commited on Jan 20, 2023

Commit

43a274e

1 Parent(s): f84ea21

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -35

app.py CHANGED Viewed

@@ -8,14 +8,21 @@ import infer_tool
 convert_cnt = [0]
 dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model_name = "152_epochs.pth"
-config_name = "nyarumul.json"
 net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}")
 # 获取config参数
 target_sample = hps_ms.data.sampling_rate
 spk_dict = {
-    "奕兰秋": 4
 }
@@ -25,13 +32,9 @@ def vc_fn(sid, audio_record, audio_upload, tran):
         audio_path = audio_upload
     elif audio_record is not None:
         audio_path = audio_record
-    else:
-        return "你需要上传wav文件或使用网页内置的录音！", None
     audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample)
     duration = audio.shape[0] / sampling_rate
-    if duration > 60:
-        return "请上传小于60s的音频，需要转换长音频请使用colab", None
     o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input)
     out_path = f"./out_temp.wav"
@@ -47,12 +50,8 @@ with app:
     with gr.Tabs():
         with gr.TabItem("Basic"):
             gr.Markdown(value="""
-            本音源有授权，二创不创死主播即可。[其他音色体验](https://huggingface.co/spaces/innnky/nyaru-svc2.0-advanced)
-            本模型为sovits_f0，支持**60s以内**的**无伴奏**wav、mp3格式，或使用**网页内置**的录音（二选一）
-            **error就用格式工厂自行转换为wav再上传**
             转换效果取决于源音频语气、节奏是否与目标音色相近。
             源音频为女声时，**建议降3-6key**，**最后的输出误差越接近0，音准越高**
@@ -64,7 +63,7 @@ with app:
             若**只看见橙色**，说明蓝色曲线被覆盖，转换效果较好
             """)
-            speaker_id = gr.Dropdown(label="音色", choices=["奕兰秋"], value="奕兰秋")
             record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
             upload_input = gr.Audio(source="upload", label="上传音频（长度小于45秒）", type="filepath",
                                     elem_id="audio_inputs")
@@ -75,24 +74,4 @@ with app:
             f0_image = gr.Image(label="f0曲线")
         vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform],
                         [out_message, out_audio, f0_image])
-        with gr.TabItem("使用说明"):
-            gr.Markdown(value="""
-                        0、合集：https://github.com/IceKyrin/sovits_guide/blob/main/README.md
-                        1、仅支持sovit_f0（sovits2.0）模型
-                        2、自行下载hubert-soft-0d54a1f4.pt改名为hubert.pt（已经下好了）
-                            https://github.com/bshall/hubert/releases/tag/v0.1
-                        3、pth文件夹下放置sovits2.0的模型
-                        4、与模型配套的xxx.json，需有speaker项——人物列表
-                        5、放无伴奏的音频、或网页内置录音，不要放奇奇怪怪的格式
-                        6、仅供交流使用，不对用户行为负责
-                        7、268000为44100预模型，配合sovits_pre.json；50000为22050预模型，配合nyarumul.json
-                        """)
-    app.launch()

 convert_cnt = [0]
 dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model_name = "G.pth"
+config_name = "config.json"
 net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"configs/{config_name}")
 # 获取config参数
 target_sample = hps_ms.data.sampling_rate
 spk_dict = {
+    "鸢一折纸": 0,
+    "时崎狂三": 1,
+    "冰芽川四糸乃": 2,
+    "五河琴里": 3,
+    "八舞夕弦": 4,
+    "八舞耶俱矢": 5,
+    "诱宵美九": 6,
+    "夜刀神十香": 7
 }
         audio_path = audio_upload
     elif audio_record is not None:
         audio_path = audio_record
     audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample)
     duration = audio.shape[0] / sampling_rate
     o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input)
     out_path = f"./out_temp.wav"
     with gr.Tabs():
         with gr.TabItem("Basic"):
             gr.Markdown(value="""
+            源码参考: (xiaolang/sovits_f0)[https://huggingface.co/spaces/xiaolang/sovits_f0/tree/main]
+            **音频格式为wav**
             转换效果取决于源音频语气、节奏是否与目标音色相近。
             源音频为女声时，**建议降3-6key**，**最后的输出误差越接近0，音准越高**
             若**只看见橙色**，说明蓝色曲线被覆盖，转换效果较好
             """)
+            speaker_id = gr.Dropdown(label="音色", choices=[spk_dict.keys], value=[spk_dict.values])
             record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
             upload_input = gr.Audio(source="upload", label="上传音频（长度小于45秒）", type="filepath",
                                     elem_id="audio_inputs")
             f0_image = gr.Image(label="f0曲线")
         vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform],
                         [out_message, out_audio, f0_image])
+    app.launch(server_name="0.0.0.0")