cloningapp / voice_cloner.py
meraj12's picture
Update voice_cloner.py
a9bed7f verified
# voice_cloner.py
from transformers import BarkModel, AutoProcessor
import torchaudio
import torch
import os
def clone_and_generate_text(text, reference_audio_path, language="English", emotion="Neutral"):
processor = AutoProcessor.from_pretrained("suno/bark")
model = BarkModel.from_pretrained("suno/bark")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
# Load and process reference audio
speech_array, sampling_rate = torchaudio.load(reference_audio_path)
speech_array = torchaudio.functional.resample(speech_array, sampling_rate, 16000)
speech_array = speech_array.mean(dim=0).unsqueeze(0) # mono
inputs = processor(
text=text,
voice_preset="v2/en_speaker_9", # generic fallback voice
return_tensors="pt"
).to(device)
with torch.no_grad():
speech = model.generate(**inputs)
output_path = "output_voice.wav"
torchaudio.save(output_path, speech.cpu(), 22050)
return output_path