tbdavid2019 commited on
Commit
b6500b8
·
verified ·
1 Parent(s): 9a473ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -97
app.py CHANGED
@@ -5,12 +5,8 @@ from tempfile import NamedTemporaryFile
5
  import time
6
  import gradio as gr
7
  from openai import OpenAI
8
- from pydub import AudioSegment
9
 
10
- # 安裝必要的庫
11
- # pip install openai gradio pydub
12
-
13
- # 標準音頻模型和聲音選項
14
  STANDARD_AUDIO_MODELS = [
15
  "tts-1",
16
  "tts-1-hd",
@@ -24,7 +20,7 @@ STANDARD_VOICES = [
24
  "shimmer",
25
  ]
26
 
27
- # 合併連續相同說話者的文本
28
  def optimize_script(script):
29
  lines = [line.strip() for line in script.splitlines() if line.strip()]
30
  optimized = []
@@ -39,32 +35,30 @@ def optimize_script(script):
39
  speaker = "speaker-2"
40
  text = line.split(":", 1)[1].strip()
41
  else:
42
- speaker = "speaker-1" # 預設使用說話者1
43
  text = line
44
 
45
- # 如果說話者變了,保存之前的文本並開始新的
46
  if speaker != current_speaker and current_text:
47
  optimized.append((current_speaker, current_text))
48
  current_text = text
49
  current_speaker = speaker
50
  else:
51
- # 相同說話者,合併文本(加空格)
52
  if current_text:
53
  current_text += " " + text
54
  else:
55
  current_text = text
56
  current_speaker = speaker
57
 
58
- # 添加最後一個說話者的文本
59
  if current_text:
60
  optimized.append((current_speaker, current_text))
61
 
62
  return optimized
63
 
64
  def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str) -> bytes:
65
- """
66
- 使用 OpenAI TTS API 生成音頻
67
- """
68
  client = OpenAI(api_key=audio_api_key)
69
  try:
70
  with client.audio.speech.with_streaming_response.create(
@@ -87,61 +81,41 @@ def generate_audio_from_script(
87
  speaker1_voice: str = "onyx",
88
  speaker2_voice: str = "nova",
89
  ) -> tuple[bytes, str]:
90
- """
91
- 從腳本生成音頻,支援兩個說話者,並優化 API 調用
92
- """
93
  status_log = []
94
- optimized_script = optimize_script(script)
95
-
96
- # 使用 pydub 處理音頻
97
- final_audio = AudioSegment.silent(duration=0)
98
 
99
- client = OpenAI(api_key=audio_api_key)
 
100
 
 
101
  for speaker, text in optimized_script:
102
  voice_to_use = speaker1_voice if speaker == "speaker-1" else speaker2_voice
103
  status_log.append(f"[{speaker}] {text}")
104
 
105
  try:
106
- # 調用 API 生成音頻
107
- with client.audio.speech.with_streaming_response.create(
108
- model=audio_model,
109
- voice=voice_to_use,
110
- input=text,
111
- ) as response:
112
- with NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
113
- for chunk in response.iter_bytes():
114
- temp_file.write(chunk)
115
- temp_file.flush()
116
-
117
- # 使用 pydub 加載和合併音頻
118
- segment = AudioSegment.from_file(temp_file.name, format="mp3")
119
- final_audio += segment
120
-
121
- # 刪除臨時文件
122
- os.unlink(temp_file.name)
123
-
124
  except Exception as e:
125
- status_log.append(f"[錯誤] 無法生成音頻: {str(e)}")
126
-
127
- # 將最終音頻轉換為 bytes
128
- output_buffer = io.BytesIO()
129
- final_audio.export(output_buffer, format="mp3")
130
- output_buffer.seek(0)
131
 
132
- return output_buffer.read(), "\n".join(status_log)
133
 
134
  def save_audio_file(audio_data: bytes) -> str:
135
- """
136
- 將音頻數據保存為臨時檔案
137
- """
138
  temp_dir = Path("./temp_audio")
139
  temp_dir.mkdir(exist_ok=True)
140
- # 清理舊檔案
141
  for old_file in temp_dir.glob("*.mp3"):
142
- if old_file.stat().st_mtime < (time.time() - 24*60*60): # 24小時前的檔案
143
  old_file.unlink()
144
- # 創建新的臨時檔案
145
  temp_file = NamedTemporaryFile(
146
  dir=temp_dir,
147
  delete=False,
@@ -151,23 +125,40 @@ def save_audio_file(audio_data: bytes) -> str:
151
  temp_file.close()
152
  return temp_file.name
153
 
154
- # Gradio 介面
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def create_gradio_interface():
156
  with gr.Blocks(title="TTS Generator", css="""
157
  #header { text-align: center; margin-bottom: 20px; }
158
  """) as demo:
159
- gr.Markdown("# 語音合成器 | TTS Generator", elem_id="header")
160
  with gr.Row():
161
  with gr.Column(scale=1):
162
- # 輸入區
163
  script_input = gr.Textbox(
164
- label="輸入腳本 | Input Script",
165
- placeholder="""請貼上腳本內容,格式如下:
166
- speaker-1: 歡迎來到 David888 Podcast,我是 David...
167
  speaker-2: 大家好,我是 Cordelia...
168
- 沒有標記說話者的行會預設使用說話者1的聲音。
169
 
170
- 提示:為提高效率,相同說話者的多行文字將自動合併處理。""",
171
  lines=20
172
  )
173
  api_key = gr.Textbox(
@@ -176,74 +167,47 @@ speaker-2: 大家好,我是 Cordelia...
176
  )
177
  with gr.Row():
178
  audio_model = gr.Dropdown(
179
- label="音頻模型 | Audio Model",
180
  choices=STANDARD_AUDIO_MODELS,
181
  value="tts-1"
182
  )
183
  speaker1_voice = gr.Dropdown(
184
- label="說話者1聲音 | Speaker 1 Voice",
185
  choices=STANDARD_VOICES,
186
  value="onyx"
187
  )
188
  speaker2_voice = gr.Dropdown(
189
- label="說話者2聲音 | Speaker 2 Voice",
190
  choices=STANDARD_VOICES,
191
  value="nova"
192
  )
193
- generate_button = gr.Button("生成音頻 | Generate Audio")
194
  with gr.Column(scale=1):
195
- # 輸出區
196
  audio_output = gr.Audio(
197
- label="生成的音頻 | Generated Audio",
198
  type="filepath"
199
  )
200
  status_output = gr.Textbox(
201
- label="生成狀態 | Generation Status",
202
  lines=20,
203
  show_copy_button=True
204
  )
205
-
206
- # 進度條
207
- progress_bar = gr.Progress()
208
 
209
- # 事件處理
210
  generate_button.click(
211
- fn=lambda script, key, model, v1, v2, p: process_and_save_audio(
212
- script, key, model, v1, v2, p
213
- ),
214
  inputs=[
215
  script_input,
216
  api_key,
217
  audio_model,
218
  speaker1_voice,
219
- speaker2_voice,
220
  ],
221
  outputs=[audio_output, status_output]
222
  )
223
  return demo
224
 
225
- def process_and_save_audio(script, api_key, model, voice1, voice2, progress=gr.Progress()):
226
- """
227
- 處理音頻生成並保存檔案
228
- """
229
- try:
230
- progress(0, desc="準備處理腳本...")
231
- audio_data, status_log = generate_audio_from_script(
232
- script,
233
- api_key,
234
- model,
235
- voice1,
236
- voice2
237
- )
238
- progress(0.9, desc="保存音頻文件...")
239
- audio_path = save_audio_file(audio_data)
240
- progress(1.0, desc="完成!")
241
- return audio_path, status_log
242
- except Exception as e:
243
- error_message = f"生成音頻時發生錯誤: {str(e)}"
244
- print(error_message)
245
- return None, error_message
246
-
247
  if __name__ == "__main__":
248
  demo = create_gradio_interface()
249
  demo.launch()
 
5
  import time
6
  import gradio as gr
7
  from openai import OpenAI
 
8
 
9
+ # 标准音频模型和声音选项
 
 
 
10
  STANDARD_AUDIO_MODELS = [
11
  "tts-1",
12
  "tts-1-hd",
 
20
  "shimmer",
21
  ]
22
 
23
+ # 优化脚本处理 - 合并相同说话者连续文本
24
  def optimize_script(script):
25
  lines = [line.strip() for line in script.splitlines() if line.strip()]
26
  optimized = []
 
35
  speaker = "speaker-2"
36
  text = line.split(":", 1)[1].strip()
37
  else:
38
+ speaker = "speaker-1" # 默认使用说话者1
39
  text = line
40
 
41
+ # 如果说话者变了,保存之前的文本并开始新的
42
  if speaker != current_speaker and current_text:
43
  optimized.append((current_speaker, current_text))
44
  current_text = text
45
  current_speaker = speaker
46
  else:
47
+ # 相同说话者,合并文本(加空格)
48
  if current_text:
49
  current_text += " " + text
50
  else:
51
  current_text = text
52
  current_speaker = speaker
53
 
54
+ # 添加最后一个说话者的文本
55
  if current_text:
56
  optimized.append((current_speaker, current_text))
57
 
58
  return optimized
59
 
60
  def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str) -> bytes:
61
+ """使用 OpenAI TTS API 生成音频"""
 
 
62
  client = OpenAI(api_key=audio_api_key)
63
  try:
64
  with client.audio.speech.with_streaming_response.create(
 
81
  speaker1_voice: str = "onyx",
82
  speaker2_voice: str = "nova",
83
  ) -> tuple[bytes, str]:
84
+ """从脚本生成音频,支持两个说话者,并优化 API 调用"""
85
+ combined_audio = b""
 
86
  status_log = []
 
 
 
 
87
 
88
+ # 优化脚本处理
89
+ optimized_script = optimize_script(script)
90
 
91
+ # 处理每一段
92
  for speaker, text in optimized_script:
93
  voice_to_use = speaker1_voice if speaker == "speaker-1" else speaker2_voice
94
  status_log.append(f"[{speaker}] {text}")
95
 
96
  try:
97
+ # 生成这一段的音频
98
+ audio_chunk = get_mp3(
99
+ text,
100
+ voice_to_use,
101
+ audio_model,
102
+ audio_api_key
103
+ )
104
+ combined_audio += audio_chunk
 
 
 
 
 
 
 
 
 
 
105
  except Exception as e:
106
+ status_log.append(f"[错误] 无法生成音频: {str(e)}")
 
 
 
 
 
107
 
108
+ return combined_audio, "\n".join(status_log)
109
 
110
  def save_audio_file(audio_data: bytes) -> str:
111
+ """将音频数据保存为临时文件"""
 
 
112
  temp_dir = Path("./temp_audio")
113
  temp_dir.mkdir(exist_ok=True)
114
+ # 清理旧文件
115
  for old_file in temp_dir.glob("*.mp3"):
116
+ if old_file.stat().st_mtime < (time.time() - 24*60*60): # 24小时前的文件
117
  old_file.unlink()
118
+ # 创建新的临时文件
119
  temp_file = NamedTemporaryFile(
120
  dir=temp_dir,
121
  delete=False,
 
125
  temp_file.close()
126
  return temp_file.name
127
 
128
+ def process_and_save_audio(script, api_key, model, voice1, voice2):
129
+ """处理音频生成并保存文件"""
130
+ try:
131
+ audio_data, status_log = generate_audio_from_script(
132
+ script,
133
+ api_key,
134
+ model,
135
+ voice1,
136
+ voice2
137
+ )
138
+ audio_path = save_audio_file(audio_data)
139
+ return audio_path, status_log
140
+ except Exception as e:
141
+ error_message = f"生成音频时发生错误: {str(e)}"
142
+ print(error_message)
143
+ return None, error_message
144
+
145
+ # Gradio 界面
146
  def create_gradio_interface():
147
  with gr.Blocks(title="TTS Generator", css="""
148
  #header { text-align: center; margin-bottom: 20px; }
149
  """) as demo:
150
+ gr.Markdown("# 语音合成器 | TTS Generator", elem_id="header")
151
  with gr.Row():
152
  with gr.Column(scale=1):
153
+ # 输入区
154
  script_input = gr.Textbox(
155
+ label="输入脚本 | Input Script",
156
+ placeholder="""请粘贴脚本内容,格式如下:
157
+ speaker-1: 欢迎来到 David888 Podcast,我是 David...
158
  speaker-2: 大家好,我是 Cordelia...
159
+ 没有标记说话者的行会默认使用说话者1的声音。
160
 
161
+ 提示:为提高效率,相同说话者的多行文字将自动合并处理。""",
162
  lines=20
163
  )
164
  api_key = gr.Textbox(
 
167
  )
168
  with gr.Row():
169
  audio_model = gr.Dropdown(
170
+ label="音频模型 | Audio Model",
171
  choices=STANDARD_AUDIO_MODELS,
172
  value="tts-1"
173
  )
174
  speaker1_voice = gr.Dropdown(
175
+ label="说话者1声音 | Speaker 1 Voice",
176
  choices=STANDARD_VOICES,
177
  value="onyx"
178
  )
179
  speaker2_voice = gr.Dropdown(
180
+ label="说话者2声音 | Speaker 2 Voice",
181
  choices=STANDARD_VOICES,
182
  value="nova"
183
  )
184
+ generate_button = gr.Button("生成音频 | Generate Audio")
185
  with gr.Column(scale=1):
186
+ # 输出区
187
  audio_output = gr.Audio(
188
+ label="生成的音频 | Generated Audio",
189
  type="filepath"
190
  )
191
  status_output = gr.Textbox(
192
+ label="生成状态 | Generation Status",
193
  lines=20,
194
  show_copy_button=True
195
  )
 
 
 
196
 
197
+ # 事件处理
198
  generate_button.click(
199
+ fn=process_and_save_audio,
 
 
200
  inputs=[
201
  script_input,
202
  api_key,
203
  audio_model,
204
  speaker1_voice,
205
+ speaker2_voice
206
  ],
207
  outputs=[audio_output, status_output]
208
  )
209
  return demo
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  if __name__ == "__main__":
212
  demo = create_gradio_interface()
213
  demo.launch()