Spaces:

tykiww
/

TTS_Demo

Sleeping

File size: 1,576 Bytes

4110922
d33f32d
 
4110922
6afc51e
3a4510f
 
4110922
3a4510f
 
 
 
 
 
 
 
 
 
 
 
4110922
 
4af5dfa
4110922
3a4510f
4af5dfa
 
 
3a4510f
4af5dfa
4110922
0a5aead
 
3a4510f
0a5aead
 
3a4510f
 
 
0a5aead
 
4af5dfa
 
3a4510f
4af5dfa
 
 
 
 
75ff974
4af5dfa
 
 
 
 
 
 
 
 
 
 
3a4510f
 
 
 
 
4af5dfa
 
45fac78

import torch
from TTS.api import TTS
import gradio as gr
import os
import spaces
import yaml


def get_config():
    # get config path
    config_path = os.environ["CONFIG_PATH"]
    # Parse the YAML file
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)

    print(config['inference'])
    print(config_path)
    return config
    
    

# Agree to Terms of service
# os.environ["COQUI_TOS_AGREED"] = "1"

def init_TTS(config):
    # Get device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    # Initialize the TTS model
    tts = TTS(config['inference']['model']).to(device)
    return tts


@spaces.GPU
def generate_speech(text, config):
    # Generate speech using the provided text, speaker voice, and language
    tts.tts_to_file(text=text,
                    file_path=config['inference']['file_path'],
                    speaker_wav=config['inference']['speaker_wav'],
                    language=config['inference']['language'])
    return file_path



def main(config):
    
    # Create the Gradio interface
    interface = gr.Interface(
        fn=generate_speech,
        inputs=[
            gr.Textbox(label="Enter your text")
        ],
        outputs="audio",
        title="Voice Synthesis with Coqui-XTTS",
        description="Synthesize speech using predefined target voice and language."
    )

    # Launch the interface
    interface.launch()
    return 0

if __name__ == "__main__":
    # Get config
    config = get_config()
    # initialize TTS
    tts = init_TTS(config)
    main(config)