Spaces:
Build error
Build error
import os | |
import torch | |
import torchaudio | |
import spaces # Import spaces module for Zero-GPU | |
from tortoise.api import TextToSpeech | |
from tortoise.utils.audio import load_audio | |
# Create cache/output directory | |
os.makedirs("outputs", exist_ok=True) | |
# Create a global TTS model instance | |
tts_model = None | |
# Add spaces.GPU decorator for Zero-GPU support | |
async def generate_speech(text, voice_preset="random", voice_file_path=None): | |
global tts_model | |
try: | |
# Initialize the model if not already initialized | |
if tts_model is None: | |
print("Initializing Tortoise-TTS model...") | |
tts_model = TextToSpeech(use_deepspeed=torch.cuda.is_available()) | |
print(f"Model initialized. Using device: {next(tts_model.autoregressive.parameters()).device}") | |
# Process voice sample if provided | |
voice_samples = None | |
if voice_file_path and os.path.exists(voice_file_path): | |
print(f"Loading voice from {voice_file_path}") | |
voice_samples, _ = load_audio(voice_file_path, 22050) | |
voice_samples = [voice_samples] | |
voice_preset = None | |
# Generate speech | |
print(f"Generating speech for text: {text[:50]}...") | |
output_filename = f"outputs/tts_output_{hash(text) % 10000}.wav" | |
gen = tts_model.tts_with_preset( | |
text, | |
voice_samples=voice_samples, | |
preset=voice_preset | |
) | |
# Save the generated audio | |
torchaudio.save(output_filename, gen.squeeze(0).cpu(), 24000) | |
print(f"Speech generated and saved to {output_filename}") | |
# Return the filename and audio data | |
return output_filename, (24000, gen.squeeze(0).cpu()) | |
except Exception as e: | |
print(f"Error generating speech: {str(e)}") | |
raise |