File size: 1,215 Bytes
873d0cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import soundfile as sf


synthesiser_ = None


def synthesiser():
    from transformers import pipeline

    global synthesiser_
    if synthesiser_ is None:
        synthesiser_ = pipeline("text-to-speech", "microsoft/speecht5_tts")
    return synthesiser_


embeddings_dataset_ = None


def embeddings_dataset():
    from datasets import load_dataset

    global embeddings_dataset_
    if embeddings_dataset_ is None:
        embeddings_dataset_ = load_dataset(
            "Matthijs/cmu-arctic-xvectors", split="validation"
        )
    return embeddings_dataset_


speaker_embedding_ = None


def speaker_embedding():
    import torch

    global speaker_embedding_
    if speaker_embedding_ is None:
        speaker_embedding_ = torch.tensor(
            embeddings_dataset()[7306]["xvector"]
        ).unsqueeze(0)
    return speaker_embedding_


def preload_tts_microsoft_local():
    synthesiser()
    embeddings_dataset()
    speaker_embedding()


def tts_microsoft_local(text_chunk, location):
    speech = synthesiser()(
        text_chunk, forward_params={"speaker_embeddings": speaker_embedding()}
    )
    sf.write(location, speech["audio"], samplerate=speech["sampling_rate"])
    return location