Spaces:
Runtime error
Runtime error
File size: 3,447 Bytes
2b37c27 62f6e75 2b37c27 62f6e75 2b37c27 37332b5 43a274e 66a8b81 62f6e75 2b37c27 43a274e 2b37c27 62f6e75 2b37c27 62f6e75 2b37c27 62f6e75 2b37c27 62f6e75 61649cd b4bbc22 43a274e b4bbc22 2b37c27 62f6e75 2b37c27 62f6e75 2b37c27 62f6e75 2b37c27 62f6e75 2b37c27 62f6e75 7bc75e4 62f6e75 01ec97d 62f6e75 2b37c27 62f6e75 2b37c27 55c91da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import time
import gradio as gr
import soundfile
import torch
import infer_tool
convert_cnt = [0]
dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "1121_epochs.pth"
config_name = "config.json"
net_g_ms, hubert_soft, feature_input, hps_ms = infer_tool.load_model(f"{model_name}", f"{config_name}")
# 获取config参数
target_sample = hps_ms.data.sampling_rate
spk_dict = {
"鸢一折纸": 0,
"时崎狂三": 1,
"冰芽川四糸乃": 2,
"五河琴里": 3,
"八舞夕弦": 4,
"八舞耶俱矢": 5,
"诱宵美九": 6,
"夜刀神十香": 7
}
def vc_fn(sid, audio_record, audio_upload, tran):
print(sid, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
if audio_upload is not None:
audio_path = audio_upload
elif audio_record is not None:
audio_path = audio_record
audio, sampling_rate = infer_tool.format_wav(audio_path, target_sample)
duration = audio.shape[0] / sampling_rate
o_audio, out_sr = infer_tool.infer(audio_path, spk_dict[sid], tran, net_g_ms, hubert_soft, feature_input)
out_path = f"./out_temp.wav"
soundfile.write(out_path, o_audio, target_sample)
infer_tool.f0_plt(audio_path, out_path, tran, hubert_soft, feature_input)
mistake, var = infer_tool.calc_error(audio_path, out_path, tran, feature_input)
return f"分段误差参考:0.3优秀,0.5左右合理,少量0.8-1可以接受\n若偏差过大,请调整升降半音数;多次调整均过大、说明超出歌手音域\n半音偏差:{mistake}\n半音方差:{var}", (
target_sample, o_audio), gr.Image.update("temp.jpg")
app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("Basic"):
gr.Markdown(value="""
源码参考: [xiaolang/sovits_f0](https://huggingface.co/spaces/xiaolang/sovits_f0/tree/main)
**音频格式为wav**
转换效果取决于源音频语气、节奏是否与目标音色相近。
源音频为女声时,**建议降3-6key**,**最后的输出误差越接近0,音准越高**
源音频为**低音男声**时,**建议升3key,具体看曲线图情况**
f0曲线可以直观的显示跑调情况,蓝色为输入音高,橙色为合成音频的音高
若**只看见橙色**,说明蓝色曲线被覆盖,转换效果较好
""")
speaker_id = gr.Dropdown(label="音色", choices=list(spk_dict.keys()), value=list(spk_dict.values()))
record_input = gr.Audio(source="microphone", label="录制你的声音", type="filepath", elem_id="audio_inputs")
upload_input = gr.Audio(source="upload", label="上传音频(长度小于180秒)", type="filepath",
elem_id="audio_inputs")
vc_transform = gr.Number(label="升降半音(整数,可以正负,半音数量,升高八度就是12)", value=0)
vc_submit = gr.Button("转换", variant="primary")
out_message = gr.Textbox(label="Output Message")
out_audio = gr.Audio(label="Output Audio")
f0_image = gr.Image(label="f0曲线")
vc_submit.click(vc_fn, [speaker_id, record_input, upload_input, vc_transform],
[out_message, out_audio, f0_image])
app.launch(share=True)
|