MMS-TTS / app.py
suayptalha's picture
Update app.py
4281cb0 verified
from transformers import VitsModel, AutoTokenizer
import torch
import gradio as gr
import spaces
import numpy as np
device = "cuda"
print(f"Using device: {device}")
model = VitsModel.from_pretrained("facebook/mms-tts-eng").to(device)
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
@spaces.GPU
def process_text(text):
inputs = tokenizer(text, return_tensors="pt").to(device)
with torch.no_grad():
output = model(**inputs).waveform
audio_numpy = output.cpu().numpy().squeeze()
sample_rate = model.config.sampling_rate
return (sample_rate, audio_numpy)
examples = [
["Hello, welcome to text-to-speech system!"],
["How amazing is artificial intelligence technology?"],
["The weather is beautiful today, isn't it?"],
["Learning new things makes life exciting."],
["This audio was generated by artificial intelligence."]
]
with gr.Blocks() as demo:
gr.Markdown("## 🎤 MMS-TTS English Text-to-Speech System")
gr.Markdown("Enter text below and convert it to natural sounding speech!")
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input Text", placeholder="Enter text here...")
gr.Examples(examples=examples, inputs=input_text, label="Example Texts")
submit_btn = gr.Button("Generate Speech")
with gr.Column():
audio_output = gr.Audio(label="Generated Speech", type="numpy")
submit_btn.click(fn=process_text, inputs=input_text, outputs=audio_output)
demo.launch()