Spaces:
Running
on
Zero
Running
on
Zero
from transformers import VitsModel, AutoTokenizer | |
import torch | |
import gradio as gr | |
import spaces | |
import numpy as np | |
device = "cuda" | |
print(f"Using device: {device}") | |
model = VitsModel.from_pretrained("facebook/mms-tts-eng").to(device) | |
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng") | |
def process_text(text): | |
inputs = tokenizer(text, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
output = model(**inputs).waveform | |
audio_numpy = output.cpu().numpy().squeeze() | |
sample_rate = model.config.sampling_rate | |
return (sample_rate, audio_numpy) | |
examples = [ | |
["Hello, welcome to text-to-speech system!"], | |
["How amazing is artificial intelligence technology?"], | |
["The weather is beautiful today, isn't it?"], | |
["Learning new things makes life exciting."], | |
["This audio was generated by artificial intelligence."] | |
] | |
with gr.Blocks() as demo: | |
gr.Markdown("## 🎤 MMS-TTS English Text-to-Speech System") | |
gr.Markdown("Enter text below and convert it to natural sounding speech!") | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...") | |
gr.Examples(examples=examples, inputs=input_text, label="Example Texts") | |
submit_btn = gr.Button("Generate Speech") | |
with gr.Column(): | |
audio_output = gr.Audio(label="Generated Speech", type="numpy") | |
submit_btn.click(fn=process_text, inputs=input_text, outputs=audio_output) | |
demo.launch() |