File size: 1,697 Bytes
b996e0b
eb3a8c0
2230bd8
 
 
eb3a8c0
8bc1b1b
 
a6dfc58
 
f690a5a
 
 
 
 
 
a6dfc58
 
8bc1b1b
 
4bb082e
a6dfc58
9383bb1
b996e0b
f690a5a
 
b996e0b
 
 
8bc1b1b
e3064ab
f690a5a
 
 
 
2230bd8
09e6eb0
 
2230bd8
09e6eb0
f690a5a
8bc1b1b
1da77a0
 
a6dfc58
e3064ab
 
09e6eb0
1da77a0
2230bd8
1da77a0
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
import torch
import torchaudio
import tempfile

import numpy as np
from nemo.collections.tts.models import FastPitchModel
from nemo.collections.tts.models import HifiGanModel
from nemo.collections.tts.models import MixerTTSModel

from transformers import pipeline


Audio(output["audio"], rate=output["sampling_rate"])


# spec_generator_2 = MixerTTSModel.from_pretrained("tts_en_lj_mixerttsx")
# model1 = HifiGanModel.from_pretrained(model_name="tts_en_lj_hifigan_ft_mixerttsx")

spec_generator = FastPitchModel.from_pretrained("tts_en_fastpitch_multispeaker")
spec_generator.eval()
voc_model = HifiGanModel.from_pretrained(model_name="tts_en_hifitts_hifigan_ft_fastpitch")
voc_model.eval()

pipe = pipeline("text-to-speech", model="suno/bark-small")

def greet(name):
    return "Hello " + name + "!!"

def generate_tts(text: str, speaker: int = 0):
    sr = 44100
    # parsed = spec_generator.parse(text)
    # spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
    # audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
    output = pipe(text)

    # with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
    #     torchaudio.save(fp.name, audio.to('cpu'), sample_rate=sr)

    #     return fp.name
    return (output["sampling_rate"], output["audio"])

def run():
    demo = gr.Interface(
        fn=generate_tts,
        inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
                gr.Slider(0, 10, step=1, label="Speaker")],
        outputs=gr.Audio(label="Output", type="numpy"),
    )

    demo.launch(server_name="0.0.0.0", server_port=7860)


if __name__ == "__main__":
    run()