kennethli319 commited on
Commit
ee819ef
·
1 Parent(s): a856e32

update tts

Browse files
Files changed (1) hide show
  1. app.py +6 -12
app.py CHANGED
@@ -7,6 +7,7 @@ import numpy as np
7
  from nemo.collections.tts.models import FastPitchModel
8
  from nemo.collections.tts.models import HifiGanModel
9
  from nemo.collections.tts.models import MixerTTSModel
 
10
 
11
  from transformers import pipeline
12
 
@@ -25,18 +26,11 @@ def greet(name):
25
 
26
  def generate_tts(text: str, speaker: int = 0):
27
  sr = 44100
28
- # parsed = spec_generator.parse(text)
29
- # spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
30
- # audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
31
- output = pipe(text)
32
-
33
- # with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
34
- # torchaudio.save(fp.name, audio.to('cpu'), sample_rate=sr)
35
-
36
- # return fp.name
37
- audio_arr = (output["audio"] * 32767).astype(np.int16)
38
-
39
- return (output["sampling_rate"], audio_arr)
40
 
41
  def run():
42
  demo = gr.Interface(
 
7
  from nemo.collections.tts.models import FastPitchModel
8
  from nemo.collections.tts.models import HifiGanModel
9
  from nemo.collections.tts.models import MixerTTSModel
10
+ from bark import SAMPLE_RATE, generate_audio, preload_models
11
 
12
  from transformers import pipeline
13
 
 
26
 
27
  def generate_tts(text: str, speaker: int = 0):
28
  sr = 44100
29
+ parsed = spec_generator.parse(text)
30
+ spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
31
+ audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
32
+
33
+ return (sr, audio.squeeze(0).cpu().numpy())
 
 
 
 
 
 
 
34
 
35
  def run():
36
  demo = gr.Interface(