RSHVR commited on
Commit
f3c69f5
·
verified ·
1 Parent(s): 9311026

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -73
app.py CHANGED
@@ -4,13 +4,10 @@ import gradio as gr
4
  import torch
5
  import torchaudio
6
  import spaces
7
- from fastapi import FastAPI, File, UploadFile, Form
8
- from fastapi.responses import FileResponse
9
  from tortoise.api import TextToSpeech
10
  from tortoise.utils.audio import load_audio
11
  import numpy as np
12
- import uvicorn
13
- from typing import Optional
14
  import uuid
15
  from pydub import AudioSegment
16
 
@@ -27,10 +24,7 @@ if torch.cuda.is_available():
27
  zero = zero.cuda()
28
  print(f"Zero tensor device: {zero.device}")
29
 
30
- # Initialize FastAPI
31
- app = FastAPI(title="Tortoise TTS API")
32
-
33
- # Initialize TTS (will be loaded on demand with Zero-GPU)
34
  tts = None
35
 
36
  # Available preset voice options
@@ -138,54 +132,6 @@ def tts_interface(text, audio_file, preset_voice, record_audio):
138
  else:
139
  return None, message
140
 
141
- # FastAPI endpoints
142
- @app.post("/api/tts_with_voice_file/")
143
- @spaces.GPU
144
- async def tts_with_voice_file(
145
- text: str = Form(...),
146
- voice_file: Optional[UploadFile] = File(None),
147
- preset_voice: Optional[str] = Form("random")
148
- ):
149
- """API endpoint for TTS with an uploaded voice file"""
150
- try:
151
- print(f"Processing with device: {zero.device}")
152
-
153
- voice_sample_path = None
154
- if voice_file:
155
- # Save uploaded file temporarily
156
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(voice_file.filename)[1])
157
- temp_file.write(await voice_file.read())
158
- temp_file.close()
159
- voice_sample_path = temp_file.name
160
-
161
- output_path, message = generate_tts_with_voice(text, voice_sample_path, preset_voice)
162
-
163
- if output_path:
164
- return FileResponse(output_path, media_type="audio/wav", filename="tts_output.wav")
165
- else:
166
- return {"status": "error", "message": message}
167
- except Exception as e:
168
- return {"status": "error", "message": f"Failed to process: {str(e)}"}
169
-
170
- @app.post("/api/tts_with_preset/")
171
- @spaces.GPU
172
- async def tts_with_preset(
173
- text: str = Form(...),
174
- preset_voice: str = Form("random")
175
- ):
176
- """API endpoint for TTS with a preset voice"""
177
- try:
178
- print(f"Processing with device: {zero.device}")
179
-
180
- output_path, message = generate_tts_with_voice(text, preset_voice=preset_voice)
181
-
182
- if output_path:
183
- return FileResponse(output_path, media_type="audio/wav", filename="tts_output.wav")
184
- else:
185
- return {"status": "error", "message": message}
186
- except Exception as e:
187
- return {"status": "error", "message": f"Failed to process: {str(e)}"}
188
-
189
  # Create Gradio interface
190
  with gr.Blocks(title="Tortoise TTS with Voice Cloning") as demo:
191
  gr.Markdown("# Tortoise Text-to-Speech with Voice Cloning")
@@ -229,26 +175,17 @@ with gr.Blocks(title="Tortoise TTS with Voice Cloning") as demo:
229
  outputs=[output_audio, output_message]
230
  )
231
 
232
- gr.Markdown("### API Endpoints")
233
  gr.Markdown("""
234
- This app also provides API endpoints:
235
 
236
- 1. **Voice File TTS** - `/api/tts_with_voice_file/`
237
- - POST request with:
238
- - `text`: Text to convert to speech (required)
239
- - `voice_file`: Audio file for voice cloning (optional)
240
- - `preset_voice`: Name of preset voice (optional, defaults to "random")
241
 
242
- 2. **Preset Voice TTS** - `/api/tts_with_preset/`
243
- - POST request with:
244
- - `text`: Text to convert to speech (required)
245
- - `preset_voice`: Name of preset voice (required)
246
-
247
- Both endpoints return a WAV file with the generated speech.
248
  """)
249
 
250
- # Mount the Gradio app to FastAPI
251
- app = gr.mount_gradio_app(app, demo, path="/")
252
-
253
  if __name__ == "__main__":
254
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
4
  import torch
5
  import torchaudio
6
  import spaces
7
+ from huggingface_hub import snapshot_download
 
8
  from tortoise.api import TextToSpeech
9
  from tortoise.utils.audio import load_audio
10
  import numpy as np
 
 
11
  import uuid
12
  from pydub import AudioSegment
13
 
 
24
  zero = zero.cuda()
25
  print(f"Zero tensor device: {zero.device}")
26
 
27
+ # Initialize Tortoise TTS (will be loaded on demand with Zero-GPU)
 
 
 
28
  tts = None
29
 
30
  # Available preset voice options
 
132
  else:
133
  return None, message
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  # Create Gradio interface
136
  with gr.Blocks(title="Tortoise TTS with Voice Cloning") as demo:
137
  gr.Markdown("# Tortoise Text-to-Speech with Voice Cloning")
 
175
  outputs=[output_audio, output_message]
176
  )
177
 
178
+ gr.Markdown("### About This App")
179
  gr.Markdown("""
180
+ This app uses Tortoise-TTS to generate high-quality speech from text.
181
 
182
+ You can:
183
+ - Enter any text you want to be spoken
184
+ - Upload or record a voice sample for voice cloning
185
+ - Or select from pre-defined voice presets
 
186
 
187
+ The app runs on Hugging Face Spaces with Zero-GPU optimization.
 
 
 
 
 
188
  """)
189
 
 
 
 
190
  if __name__ == "__main__":
191
+ demo.launch()