TaiYouWeb commited on
Commit
db3663c
·
verified ·
1 Parent(s): 5ca847f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -27
app.py CHANGED
@@ -1,49 +1,41 @@
1
  import io
2
  import os
3
  import tempfile
4
- from typing import List, Optional
5
 
6
  import TTS.api
7
  import torch
8
  from pydub import AudioSegment
9
-
10
- from fastapi import FastAPI, File, Form, UploadFile, HTTPException
11
- from fastapi.responses import StreamingResponse, Response
12
 
13
  import config
14
 
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
 
17
  models = {}
18
-
19
  for id, model in config.models.items():
20
  models[id] = TTS.api.TTS(model).to(device)
21
 
22
- class SynthesizeResponse(Response):
23
- media_type = 'audio/wav'
24
 
25
- app = FastAPI()
26
-
27
- @app.post('/tts', response_class=SynthesizeResponse)
28
- async def synthesize(
29
- text: str = Form('Hello,World!'),
30
- speaker_wavs: List[UploadFile] = File(None),
31
- speaker_idx: str = Form('Ana Florence'),
32
- language: str = Form('ja'),
33
- temperature: float = Form(0.65),
34
- length_penalty: float = Form(1.0),
35
- repetition_penalty: float = Form(2.0),
36
- top_k: int = Form(50),
37
- top_p: float = Form(0.8),
38
- speed: float = Form(1.0),
39
- enable_text_splitting: bool = Form(True)
40
- ) -> StreamingResponse:
41
  temp_files = []
42
  try:
43
  if speaker_wavs:
44
  # Process each uploaded file
45
  for speaker_wav in speaker_wavs:
46
- speaker_wav_bytes = await speaker_wav.read()
47
  # Convert the uploaded audio file to a WAV format using pydub
48
  try:
49
  audio = AudioSegment.from_file(io.BytesIO(speaker_wav_bytes))
@@ -51,7 +43,7 @@ async def synthesize(
51
  audio.export(wav_buffer, format="wav")
52
  wav_buffer.seek(0) # Reset buffer position to the beginning
53
  except Exception as e:
54
- raise HTTPException(status_code=400, detail=f"Error processing audio file: {e}")
55
 
56
  temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
57
  temp_wav_file.write(wav_buffer.read())
@@ -87,9 +79,36 @@ async def synthesize(
87
  speed=speed,
88
  enable_text_splitting=enable_text_splitting
89
  )
 
90
  output_buffer.seek(0)
91
- return StreamingResponse(output_buffer, media_type="audio/wav")
 
92
  finally:
93
  for temp_file in temp_files:
94
  if isinstance(temp_file, str) and os.path.exists(temp_file):
95
- os.remove(temp_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import io
2
  import os
3
  import tempfile
4
+ from typing import List
5
 
6
  import TTS.api
7
  import torch
8
  from pydub import AudioSegment
9
+ import gradio as gr # Gradio库
 
 
10
 
11
  import config
12
 
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
  models = {}
 
16
  for id, model in config.models.items():
17
  models[id] = TTS.api.TTS(model).to(device)
18
 
 
 
19
 
20
+ def synthesize_tts(
21
+ text: str = 'Hello, World!',
22
+ speaker_wavs: List[gr.File] = None,
23
+ speaker_idx: str = 'Ana Florence',
24
+ language: str = 'ja',
25
+ temperature: float = 0.65,
26
+ length_penalty: float = 1.0,
27
+ repetition_penalty: float = 2.0,
28
+ top_k: int = 50,
29
+ top_p: float = 0.8,
30
+ speed: float = 1.0,
31
+ enable_text_splitting: bool = True,
32
+ ):
 
 
 
33
  temp_files = []
34
  try:
35
  if speaker_wavs:
36
  # Process each uploaded file
37
  for speaker_wav in speaker_wavs:
38
+ speaker_wav_bytes = speaker_wav.read()
39
  # Convert the uploaded audio file to a WAV format using pydub
40
  try:
41
  audio = AudioSegment.from_file(io.BytesIO(speaker_wav_bytes))
 
43
  audio.export(wav_buffer, format="wav")
44
  wav_buffer.seek(0) # Reset buffer position to the beginning
45
  except Exception as e:
46
+ return f"Error processing audio file: {e}"
47
 
48
  temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
49
  temp_wav_file.write(wav_buffer.read())
 
79
  speed=speed,
80
  enable_text_splitting=enable_text_splitting
81
  )
82
+
83
  output_buffer.seek(0)
84
+ return output_buffer.read()
85
+
86
  finally:
87
  for temp_file in temp_files:
88
  if isinstance(temp_file, str) and os.path.exists(temp_file):
89
+ os.remove(temp_file)
90
+
91
+
92
+ # 创建Gradio界面
93
+ inputs = [
94
+ gr.Textbox(value="Hello, World!", label="Text to Synthesize"),
95
+ gr.File(file_types=["audio"], label="Speaker WAV files (optional)", optional=True, multiple=True),
96
+ gr.Textbox(value="Ana Florence", label="Speaker Index"),
97
+ gr.Textbox(value="ja", label="Language"),
98
+ gr.Slider(0, 1, value=0.65, step=0.01, label="Temperature"),
99
+ gr.Slider(0.5, 2, value=1.0, step=0.1, label="Length Penalty"),
100
+ gr.Slider(1, 10, value=2.0, step=0.1, label="Repetition Penalty"),
101
+ gr.Slider(1, 100, value=50, step=1, label="Top-K"),
102
+ gr.Slider(0, 1, value=0.8, step=0.01, label="Top-P"),
103
+ gr.Slider(0.5, 2, value=1.0, step=0.01, label="Speed"),
104
+ gr.Checkbox(value=True, label="Enable Text Splitting")
105
+ ]
106
+
107
+ outputs = gr.Audio(label="Generated Speech")
108
+
109
+ gr.Interface(
110
+ fn=synthesize_tts,
111
+ inputs=inputs,
112
+ outputs=outputs,
113
+ title="Text-to-Speech Synthesis with Gradio"
114
+ ).launch()