File size: 601 Bytes
084f005
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from transformers import BarkModel, BarkProcessor
import torch

# You could switch to xtts-v2 or Tortoise if needed
def clone_and_generate_text(text, reference_audio_path, output_path="clone_output.wav"):
    processor = BarkProcessor.from_pretrained("suno/bark")
    model = BarkModel.from_pretrained("suno/bark").cuda()

    # Preprocess input
    inputs = processor(text=text, voice_preset="v2/en_speaker_9", return_tensors="pt").to("cuda")

    # Generate speech
    speech = model.generate(**inputs)

    # Save output
    torchaudio.save(output_path, speech.cpu(), 22050)
    return output_path