Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torchaudio | |
import torch | |
from fairseq import checkpoint_utils | |
import numpy as np | |
import tempfile | |
import os | |
# Verificar si CUDA est谩 disponible | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
print(f"Usando {device} para la clonaci贸n de voz") | |
# Cargar el modelo en GPU si est谩 disponible | |
models, cfg, task = checkpoint_utils.load_model_ensemble_and_task(["https://dl.fbaipublicfiles.com/vits/model.pt"]) | |
model = models[0].to(device) | |
model.eval() | |
def clone_voice(reference_audio, text): | |
# Convertir el audio de referencia a tensor | |
waveform, sample_rate = torchaudio.load(reference_audio.name) | |
# Normalizar el audio de referencia | |
waveform = waveform.mean(dim=0) # Convertir a mono | |
waveform = torchaudio.transforms.Resample(sample_rate, 22050)(waveform) # Asegurar 22.05 kHz | |
# Convertir el audio a tensor en la GPU si est谩 disponible | |
waveform = waveform.unsqueeze(0).to(device) | |
# Extraer la huella de voz del hablante | |
speaker_embedding = model.get_speaker_embedding(waveform) | |
# Generar la voz clonada | |
synthesized_waveform = model.synthesize(text, speaker_embedding) | |
# Pasar el audio generado a la CPU para guardarlo | |
synthesized_waveform = synthesized_waveform.cpu() | |
# Guardar temporalmente el audio generado | |
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
torchaudio.save(output_file.name, synthesized_waveform, 22050) | |
return output_file.name | |
# Crear interfaz Gradio | |
interface = gr.Interface( | |
fn=clone_voice, | |
inputs=[gr.Audio(type="file"), gr.Textbox(label="Texto a sintetizar")], | |
outputs=gr.Audio(label="Voz Clonada"), | |
title="Clonaci贸n de Voz con GPU", | |
description="Sube un audio de referencia y escribe un texto para clonarlo con aceleraci贸n en GPU (si est谩 disponible)." | |
) | |
interface.launch() | |