Spaces:

mantrakp
/

aai

Runtime error

barreloflube commited on Sep 17, 2024

Commit

c6116e3

1 Parent(s): 382185c

Refactor config.py to update CosyVoice TTS imports

Files changed (2) hide show

tabs/audios/events.py CHANGED Viewed

@@ -155,7 +155,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
         if not sft_speaker:
             raise gr.Error('Please select a speaker')
-        for i, j in enumerate(cv_sft.inference_sft(
             tts_text=text,
             spk_id=sft_speaker,
         )):
@@ -168,7 +168,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
         if not speaker_audio_file:
             raise gr.Error('Please upload an audio')
-        for i, j in enumerate(cv_vc.inference_zero_shot(
             tts_text=text,
             prompt_text=voice_instructions,
             prompt_speech_16k=prompt_speech_16k,
@@ -182,7 +182,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
         if not speaker_audio_file:
             raise gr.Error('Please upload an audio')
-        for i, j in enumerate(cv_vc.inference_cross_lingual(
             tts_text=text,
             prompt_speech_16k=prompt_speech_16k,
         )):
@@ -195,7 +195,7 @@ def gen_audio(text, mode, sft_speaker = None, speaker_audio = None, voice_instru
         if not voice_instructions:
             raise gr.Error('Please enter voice instructions')
-        for i, j in enumerate(cv_instruct.inference_instruct(
             tts_text=text,
             spk_id=sft_speaker,
             instruct_text=voice_instructions,

         if not sft_speaker:
             raise gr.Error('Please select a speaker')
+        for i, j in enumerate(cosyvoice_sft.inference_sft(
             tts_text=text,
             spk_id=sft_speaker,
         )):
         if not speaker_audio_file:
             raise gr.Error('Please upload an audio')
+        for i, j in enumerate(cosyvoice.inference_zero_shot(
             tts_text=text,
             prompt_text=voice_instructions,
             prompt_speech_16k=prompt_speech_16k,
         if not speaker_audio_file:
             raise gr.Error('Please upload an audio')
+        for i, j in enumerate(cosyvoice.inference_cross_lingual(
             tts_text=text,
             prompt_speech_16k=prompt_speech_16k,
         )):
         if not voice_instructions:
             raise gr.Error('Please enter voice instructions')
+        for i, j in enumerate(cosyvoice_instruct.inference_instruct(
             tts_text=text,
             spk_id=sft_speaker,
             instruct_text=voice_instructions,

tabs/audios/load_models.py CHANGED Viewed

@@ -14,25 +14,14 @@ def init_sys():
     # Load DeepFilterNet2
     df_model, df_state, _ = init_df()
-    # Download CosyVoice models
-    snapshot_download('iic/CosyVoice-300M', local_dir=f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M')
-    snapshot_download('iic/CosyVoice-300M-SFT', local_dir=f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-SFT')
-    snapshot_download('iic/CosyVoice-300M-Instruct', local_dir=f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-Instruct')
-    snapshot_download('iic/CosyVoice-ttsfrd', local_dir=f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-ttsfrd')
-    # Add `tabs/audios/modules/CosyVoice/third_party/Matcha-TTS` to your `PYTHONPATH`
-    os.environ['PYTHONPATH'] = f'{os.path.dirname(__file__)}/modules/CosyVoice/third_party/Matcha-TTS:{os.environ.get("PYTHONPATH", "")}'
-    # Load CosyVoice SFT
-    cv_sft = CosyVoice(f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-SFT')
-    sft_speakers = cv_sft.list_avaliable_spks()
-    # Load CosyVoice TTS
-    cv_vc = CosyVoice(f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M')
-    # Load CosyVoice Instruct
-    cv_instruct = CosyVoice(f'{Config.MODEL_DOWNLOAD_DIR}/audios/CosyVoice-300M-Instruct')
-    return device, df_model, df_state, cv_vc, cv_sft, sft_speakers, cv_instruct
-device, df_model, df_state, cv_vc, cv_sft, sft_speakers, cv_instruct = init_sys()

     # Load DeepFilterNet2
     df_model, df_state, _ = init_df()
+    # Load CosyVoice
+    cosyvoice= CosyVoice('FunAudioLLM/CosyVoice-300M')
+    cosyvoice_sft= CosyVoice('FunAudioLLM/CosyVoice-300M-SFT')
+    sft_speakers= cosyvoice_sft.list_avaliable_spks()
+    cosyvoice_instruct= CosyVoice('FunAudioLLM/CosyVoice-300M-Instruct')
+    return device, df_model, df_state, cosyvoice, cosyvoice_sft, sft_speakers, cosyvoice_instruct
+device, df_model, df_state, cosyvoice, cosyvoice_sft, sft_speakers, cosyvoice_instruct = init_sys()