Spaces:

lshzhm
/

DeepAudio-V1

Running

lshzhm commited on Mar 25

Commit

4427b01

1 Parent(s): 50fd409

gradio

Files changed (2) hide show

F5-TTS/src/f5_tts/infer/infer_cli_test.py CHANGED Viewed

@@ -203,6 +203,11 @@ parser.add_argument(
     type=str,
     default="",
 )
 parser.add_argument(
     "--txt",
     type=str,
@@ -446,7 +451,8 @@ if __name__ == "__main__":
             video, txt, wav = data
             video_p, txt_p, wav_p = data_p
-            v2a_audio = v2a_path + video.replace("/", "__").strip(".") + ".flac"
             #v2a_audio_p = v2a_path + video_p.replace("/", "__").strip(".") + ".flac"
             print(video, wav, v2a_audio, video_p, wav_p)

     type=str,
     default="",
 )
+parser.add_argument(
+    "--v2a_wav",
+    type=str,
+    default="",
+)
 parser.add_argument(
     "--txt",
     type=str,
             video, txt, wav = data
             video_p, txt_p, wav_p = data_p
+            #v2a_audio = v2a_path + video.replace("/", "__").strip(".") + ".flac"
+            v2a_audio = args.v2a_wav
             #v2a_audio_p = v2a_path + video_p.replace("/", "__").strip(".") + ".flac"
             print(video, wav, v2a_audio, video_p, wav_p)

app.py CHANGED Viewed

@@ -20,6 +20,17 @@ import requests
 import shutil
 import numpy as np
 log = logging.getLogger()
@@ -60,7 +71,7 @@ def video_to_audio_and_speech(video: gr.Video, prompt: str, text: str, audio_pro
     print("v2a command", command)
     os.system(command)
-    command = "python ./F5-TTS/src/f5_tts/infer/infer_cli_test.py --output_dir %s --start 0 --end 1 --ckpt_file ./F5-TTS/ckpts/v2c/v2c_s44.pt --v2a_path %s --wav_p %s --txt_p \"%s\" --video %s --txt \"%s\"" % (output_dir, output_dir, audio_p_path, text_prompt, video_save_path, text)
     print("v2s command", command)
     os.system(command)
     video_gen = output_dir + "/videos/gen/0001"

 import shutil
 import numpy as np
+from huggingface_hub import hf_hub_download
+model_path = "./F5-TTS/ckpts/v2c/"
+if not os.path.exists(model_path):
+    os.makedirs(model_path)
+file_path = hf_hub_download(repo_id="lshzhm/DeepAudio-V1", filename="v2c_s44.pt", local_dir=model_path)
+print(f"Model saved at: {file_path}")
 log = logging.getLogger()
     print("v2a command", command)
     os.system(command)
+    command = "python ./F5-TTS/src/f5_tts/infer/infer_cli_test.py --output_dir %s --start 0 --end 1 --ckpt_file ./F5-TTS/ckpts/v2c/v2c_s44.pt --v2a_path %s --wav_p %s --txt_p \"%s\" --video %s --v2a_wav %s --txt \"%s\"" % (output_dir, output_dir, audio_p_path, text_prompt, video_save_path, video_save_path[:-4]+".flac", text)
     print("v2s command", command)
     os.system(command)
     video_gen = output_dir + "/videos/gen/0001"