Samuel L Meyers commited on
Commit
9dd385e
·
1 Parent(s): 8688550

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -1,19 +1,20 @@
1
  import gradio as gr
2
- from transformers import AutoProcessor, BarkModel
3
- import scipy
4
  import torch
 
5
 
6
- processor = AutoProcessor.from_pretrained("suno/bark-small")
7
- model = BarkModel.from_pretrained("suno/bark-small")
 
 
8
 
9
  def greet(text):
10
- inputs = processor(
11
- text=[text],
12
- return_tensors="pt",
13
- )
14
- speech_values = model.generate(**inputs, do_sample=True, num_beams=5)
15
- scipy.io.wavfile.write("tmp.wav", rate=24000, data=speech_values.cpu().numpy().squeeze())
16
- return open("tmp.wav", "rb").read()
17
 
18
  iface = gr.Interface(fn=greet, inputs="text", outputs="audio")
19
  iface.launch()
 
1
  import gradio as gr
2
+ from transformers import VitsModel, AutoTokenizer
 
3
  import torch
4
+ import scipy.io.wavfile as wavfile
5
 
6
+ model = VitsModel.from_pretrained("facebook/mms-tts-eng")
7
+ tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
8
+
9
+ text = "some example text in the English language"
10
 
11
  def greet(text):
12
+ inputs = tokenizer(text, return_tensors="pt")
13
+ with torch.no_grad():
14
+ output = model(**inputs).waveform
15
+ out = output[0]
16
+ wavfile.write("tmp.wav", rate=16000, data=out)
17
+ return open("tmp.wav", "rb").read()
 
18
 
19
  iface = gr.Interface(fn=greet, inputs="text", outputs="audio")
20
  iface.launch()