RSHVR commited on
Commit
60b8847
·
verified ·
1 Parent(s): 836c8e6

Create tts.py

Browse files
Files changed (1) hide show
  1. tts.py +52 -0
tts.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torchaudio
4
+ import spaces # Import spaces module for Zero-GPU
5
+ from tortoise.api import TextToSpeech
6
+ from tortoise.utils.audio import load_audio
7
+
8
+ # Create cache/output directory
9
+ os.makedirs("outputs", exist_ok=True)
10
+
11
+ # Create a global TTS model instance
12
+ tts_model = None
13
+
14
+ @spaces.GPU # Add spaces.GPU decorator for Zero-GPU support
15
+ async def generate_speech(text, voice_preset="random", voice_file_path=None):
16
+ global tts_model
17
+
18
+ try:
19
+ # Initialize the model if not already initialized
20
+ if tts_model is None:
21
+ print("Initializing Tortoise-TTS model...")
22
+ tts_model = TextToSpeech(use_deepspeed=torch.cuda.is_available())
23
+ print(f"Model initialized. Using device: {next(tts_model.autoregressive.parameters()).device}")
24
+
25
+ # Process voice sample if provided
26
+ voice_samples = None
27
+ if voice_file_path and os.path.exists(voice_file_path):
28
+ print(f"Loading voice from {voice_file_path}")
29
+ voice_samples, _ = load_audio(voice_file_path, 22050)
30
+ voice_samples = [voice_samples]
31
+ voice_preset = None
32
+
33
+ # Generate speech
34
+ print(f"Generating speech for text: {text[:50]}...")
35
+ output_filename = f"outputs/tts_output_{hash(text) % 10000}.wav"
36
+
37
+ gen = tts_model.tts_with_preset(
38
+ text,
39
+ voice_samples=voice_samples,
40
+ preset=voice_preset
41
+ )
42
+
43
+ # Save the generated audio
44
+ torchaudio.save(output_filename, gen.squeeze(0).cpu(), 24000)
45
+ print(f"Speech generated and saved to {output_filename}")
46
+
47
+ # Return the filename and audio data
48
+ return output_filename, (24000, gen.squeeze(0).cpu())
49
+
50
+ except Exception as e:
51
+ print(f"Error generating speech: {str(e)}")
52
+ raise