Commit
·
c6116e3
1
Parent(s):
382185c
Refactor config.py to update CosyVoice TTS imports
Browse files- tabs/audios/events.py +4 -4
- tabs/audios/load_models.py +7 -18
tabs/audios/events.py
CHANGED
@@ -155,7 +155,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
|
|
155 |
if not sft_speaker:
|
156 |
raise gr.Error('Please select a speaker')
|
157 |
|
158 |
-
for i, j in enumerate(
|
159 |
tts_text=text,
|
160 |
spk_id=sft_speaker,
|
161 |
)):
|
@@ -168,7 +168,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
|
|
168 |
if not speaker_audio_file:
|
169 |
raise gr.Error('Please upload an audio')
|
170 |
|
171 |
-
for i, j in enumerate(
|
172 |
tts_text=text,
|
173 |
prompt_text=voice_instructions,
|
174 |
prompt_speech_16k=prompt_speech_16k,
|
@@ -182,7 +182,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
|
|
182 |
if not speaker_audio_file:
|
183 |
raise gr.Error('Please upload an audio')
|
184 |
|
185 |
-
for i, j in enumerate(
|
186 |
tts_text=text,
|
187 |
prompt_speech_16k=prompt_speech_16k,
|
188 |
)):
|
@@ -195,7 +195,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
|
|
195 |
if not voice_instructions:
|
196 |
raise gr.Error('Please enter voice instructions')
|
197 |
|
198 |
-
for i, j in enumerate(
|
199 |
tts_text=text,
|
200 |
spk_id=sft_speaker,
|
201 |
instruct_text=voice_instructions,
|
|
|
155 |
if not sft_speaker:
|
156 |
raise gr.Error('Please select a speaker')
|
157 |
|
158 |
+
for i, j in enumerate(cosyvoice_sft.inference_sft(
|
159 |
tts_text=text,
|
160 |
spk_id=sft_speaker,
|
161 |
)):
|
|
|
168 |
if not speaker_audio_file:
|
169 |
raise gr.Error('Please upload an audio')
|
170 |
|
171 |
+
for i, j in enumerate(cosyvoice.inference_zero_shot(
|
172 |
tts_text=text,
|
173 |
prompt_text=voice_instructions,
|
174 |
prompt_speech_16k=prompt_speech_16k,
|
|
|
182 |
if not speaker_audio_file:
|
183 |
raise gr.Error('Please upload an audio')
|
184 |
|
185 |
+
for i, j in enumerate(cosyvoice.inference_cross_lingual(
|
186 |
tts_text=text,
|
187 |
prompt_speech_16k=prompt_speech_16k,
|
188 |
)):
|
|
|
195 |
if not voice_instructions:
|
196 |
raise gr.Error('Please enter voice instructions')
|
197 |
|
198 |
+
for i, j in enumerate(cosyvoice_instruct.inference_instruct(
|
199 |
tts_text=text,
|
200 |
spk_id=sft_speaker,
|
201 |
instruct_text=voice_instructions,
|
tabs/audios/load_models.py
CHANGED
@@ -14,25 +14,14 @@ def init_sys():
|
|
14 |
# Load DeepFilterNet2
|
15 |
df_model, df_state, _ = init_df()
|
16 |
|
17 |
-
#
|
18 |
-
|
19 |
-
snapshot_download('iic/CosyVoice-300M-SFT', local_dir=f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-SFT')
|
20 |
-
snapshot_download('iic/CosyVoice-300M-Instruct', local_dir=f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-Instruct')
|
21 |
-
snapshot_download('iic/CosyVoice-ttsfrd', local_dir=f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-ttsfrd')
|
22 |
|
23 |
-
|
24 |
-
|
25 |
|
26 |
-
|
27 |
-
cv_sft = CosyVoice(f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-SFT')
|
28 |
-
sft_speakers = cv_sft.list_avaliable_spks()
|
29 |
|
30 |
-
|
31 |
-
cv_vc = CosyVoice(f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M')
|
32 |
-
|
33 |
-
# Load CosyVoice Instruct
|
34 |
-
cv_instruct = CosyVoice(f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-Instruct')
|
35 |
-
|
36 |
-
return device, df_model, df_state, cv_vc, cv_sft, sft_speakers, cv_instruct
|
37 |
|
38 |
-
device, df_model, df_state,
|
|
|
14 |
# Load DeepFilterNet2
|
15 |
df_model, df_state, _ = init_df()
|
16 |
|
17 |
+
# Load CosyVoice
|
18 |
+
cosyvoice= CosyVoice('FunAudioLLM/CosyVoice-300M')
|
|
|
|
|
|
|
19 |
|
20 |
+
cosyvoice_sft= CosyVoice('FunAudioLLM/CosyVoice-300M-SFT')
|
21 |
+
sft_speakers= cosyvoice_sft.list_avaliable_spks()
|
22 |
|
23 |
+
cosyvoice_instruct= CosyVoice('FunAudioLLM/CosyVoice-300M-Instruct')
|
|
|
|
|
24 |
|
25 |
+
return device, df_model, df_state, cosyvoice, cosyvoice_sft, sft_speakers, cosyvoice_instruct
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
device, df_model, df_state, cosyvoice, cosyvoice_sft, sft_speakers, cosyvoice_instruct = init_sys()
|