|
import gradio as gr |
|
import time |
|
from transformers import pipeline |
|
|
|
def tts_inference(text, model_name): |
|
model = {"reference": model_name} |
|
pipe = pipeline("text-to-speech", model=model['reference']) |
|
print('Processing...') |
|
t = time.time() |
|
output = pipe(text) |
|
t = time.time() - t |
|
print(f"Took {round(t)} seconds") |
|
return (output["audio"], output["sampling_rate"]) |
|
|
|
|
|
available_models = [ |
|
"microsoft/speecht5_tts", |
|
"facebook/mms-tts-eng", |
|
"suno/bark" |
|
] |
|
|
|
gr.Interface( |
|
fn=tts_inference, |
|
inputs=[ |
|
gr.Textbox(label="Enter text", placeholder="Type something to convert to speech..."), |
|
gr.Dropdown(available_models, label="Select Model") |
|
], |
|
outputs=gr.Audio(type="numpy", label="Generated Speech"), |
|
title="Hugging Face TTS Space", |
|
description="Enter text and generate speech using Hugging Face's text-to-speech models." |
|
).launch() |
|
|