Spaces:
Sleeping
Sleeping
# voice_cloner.py | |
from transformers import BarkModel, AutoProcessor | |
import torchaudio | |
import torch | |
import os | |
def clone_and_generate_text(text, reference_audio_path, language="English", emotion="Neutral"): | |
processor = AutoProcessor.from_pretrained("suno/bark") | |
model = BarkModel.from_pretrained("suno/bark") | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
# Load and process reference audio | |
speech_array, sampling_rate = torchaudio.load(reference_audio_path) | |
speech_array = torchaudio.functional.resample(speech_array, sampling_rate, 16000) | |
speech_array = speech_array.mean(dim=0).unsqueeze(0) # mono | |
inputs = processor( | |
text=text, | |
voice_preset="v2/en_speaker_9", # generic fallback voice | |
return_tensors="pt" | |
).to(device) | |
with torch.no_grad(): | |
speech = model.generate(**inputs) | |
output_path = "output_voice.wav" | |
torchaudio.save(output_path, speech.cpu(), 22050) | |
return output_path | |