Spaces:

lalalic
/

xtts

Sleeping

App Files Files Community

lalalic commited on Mar 28, 2024

Commit

84202be

verified ·

1 Parent(s): b283951

Update xtts.py

Browse files

Files changed (1) hide show

xtts.py +47 -23

xtts.py CHANGED Viewed

@@ -14,7 +14,7 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
 # def upload_bytes(bytes, ext=".wav"):
 #     return bytes
-from qili import upload_bytes
 # if __name__ == "__main__":
 #     app = Flask(__name__)
 # else:
@@ -32,29 +32,35 @@ if not os.path.exists(sample_root):
 default_sample=f'{os.path.dirname(os.path.abspath(__file__))}/sample.wav', f'{sample_root}/sample.pt'
 ffmpeg=f'{os.path.dirname(os.path.abspath(__file__))}/ffmpeg'
 def predict(text, sample=None, language="zh"):
     global tts
     global model
     try:
-        if tts is None:
-            model_dir=os.environ.get("MODEL_DIR")
-            model_path=model_dir
-            config_path=f'{model_dir}/config.json'
-            vocoder_config_path=f'{model_dir}/vocab.json'
-            model_name="tts_models/multilingual/multi-dataset/xtts_v2"
-            logging.info(f"loading model {model_name} ...")
-            tts = TTS(
-                # model_name,
-                model_path=model_path,
-                config_path=config_path,
-                vocoder_config_path=vocoder_config_path,
-                progress_bar=True
-            )
-            model=tts.synthesizer.tts_model
-            #hack to use cache
-            model.__get_conditioning_latents=model.get_conditioning_latents
-            model.get_conditioning_latents=get_conditioning_latents
-            logging.info("model is ready")
         text= re.sub("([^\x00-\x7F]|\w)(\.|\。|\?)",r"\1 \2\2",text)
         wav = tts.tts(
             text,
@@ -143,7 +149,7 @@ def trim_sample_audio(speaker_wav):
 from flask import Flask, request
 app = Flask(__name__)
 @app.route("/tts")
-def convert():
     text = request.args.get('text')
     sample = request.args.get('sample')
     language = request.args.get('language')
@@ -158,11 +164,29 @@ def convert():
 # @app.get("/play")
 # def play(text: str=Query(None), sample: str=Query(None), language: str=Query('zh')):
-@app.route("/play")
-def play():
     url=convert()
     return playInHTML(url)
 # import gradio as gr
 # demo=gr.Interface(predict, inputs=["text", "text"], outputs=gr.Audio())
 # app = gr.mount_gradio_app(app, demo, path="/")

 # def upload_bytes(bytes, ext=".wav"):
 #     return bytes
+from qili import upload, upload_bytes
 # if __name__ == "__main__":
 #     app = Flask(__name__)
 # else:
 default_sample=f'{os.path.dirname(os.path.abspath(__file__))}/sample.wav', f'{sample_root}/sample.pt'
 ffmpeg=f'{os.path.dirname(os.path.abspath(__file__))}/ffmpeg'
+def get_tts():
+    global tts
+    global model
+    if tts is None:
+        model_dir=os.environ.get("MODEL_DIR")
+        model_path=model_dir
+        config_path=f'{model_dir}/config.json'
+        vocoder_config_path=f'{model_dir}/vocab.json'
+        model_name="tts_models/multilingual/multi-dataset/xtts_v2"
+        logging.info(f"loading model {model_name} ...")
+        tts = TTS(
+            # model_name,
+            model_path=model_path,
+            config_path=config_path,
+            vocoder_config_path=vocoder_config_path,
+            progress_bar=True
+        )
+        model=tts.synthesizer.tts_model
+        #hack to use cache
+        model.__get_conditioning_latents=model.get_conditioning_latents
+        model.get_conditioning_latents=get_conditioning_latents
+        logging.info("model is ready")
 def predict(text, sample=None, language="zh"):
     global tts
     global model
     try:
+        get_tts()
         text= re.sub("([^\x00-\x7F]|\w)(\.|\。|\?)",r"\1 \2\2",text)
         wav = tts.tts(
             text,
 from flask import Flask, request
 app = Flask(__name__)
 @app.route("/tts")
+def tts():
     text = request.args.get('text')
     sample = request.args.get('sample')
     language = request.args.get('language')
 # @app.get("/play")
 # def play(text: str=Query(None), sample: str=Query(None), language: str=Query('zh')):
+@app.route("/tts/play")
+def tts_play():
     url=convert()
     return playInHTML(url)
+@app.route("/clone")
+def clone():
+    source=request.args.get('source')
+    sample=request.args.get('sample')
+    get_tts()
+    output=tempfile.mktemp(suffix=".wav", delete=False)[1]
+    tts.voice_conversion_to_file(
+        source_wav=source,
+        target_wav=sample,
+        file_path=output
+    )
+    return upload(output)
+@app.route("/clone/play")
+def clone_play():
+    url=clone()
+    return playInHTML(url)
 # import gradio as gr
 # demo=gr.Interface(predict, inputs=["text", "text"], outputs=gr.Audio())
 # app = gr.mount_gradio_app(app, demo, path="/")