tbdavid2019 commited on
Commit
0172aae
·
1 Parent(s): 9d571d9
Files changed (2) hide show
  1. app.py +219 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ from pathlib import Path
4
+ from tempfile import NamedTemporaryFile
5
+ import time
6
+ import gradio as gr
7
+ from openai import OpenAI
8
+
9
+ # 標準音頻模型和聲音選項
10
+ STANDARD_AUDIO_MODELS = [
11
+ "tts-1",
12
+ "tts-1-hd",
13
+ ]
14
+
15
+ STANDARD_VOICES = [
16
+ "alloy",
17
+ "echo",
18
+ "fable",
19
+ "onyx",
20
+ "nova",
21
+ "shimmer",
22
+ ]
23
+
24
+ def get_mp3(text: str, voice: str, audio_model: str, audio_api_key: str) -> bytes:
25
+ """
26
+ 使用 OpenAI TTS API 生成音頻
27
+ """
28
+ client = OpenAI(api_key=audio_api_key)
29
+
30
+ try:
31
+ with client.audio.speech.with_streaming_response.create(
32
+ model=audio_model,
33
+ voice=voice,
34
+ input=text,
35
+ ) as response:
36
+ with io.BytesIO() as file:
37
+ for chunk in response.iter_bytes():
38
+ file.write(chunk)
39
+ return file.getvalue()
40
+ except Exception as e:
41
+ print(f"Error generating audio: {e}")
42
+ raise
43
+
44
+ def generate_audio_from_script(
45
+ script: str,
46
+ audio_api_key: str,
47
+ audio_model: str = "tts-1",
48
+ speaker1_voice: str = "onyx",
49
+ speaker2_voice: str = "nova",
50
+ ) -> tuple[bytes, str]:
51
+ """
52
+ 從腳本生成音頻,支援兩個說話者
53
+ """
54
+ combined_audio = b""
55
+ status_log = []
56
+
57
+ # 處理每一行
58
+ for line in script.splitlines():
59
+ line = line.strip()
60
+ if not line:
61
+ continue
62
+
63
+ # 預設使用 speaker 1
64
+ voice_to_use = speaker1_voice
65
+ text_to_speak = line
66
+
67
+ # 檢查說話者標記
68
+ if line.lower().startswith("speaker-1:"):
69
+ voice_to_use = speaker1_voice
70
+ text_to_speak = line.split(":", 1)[1].strip()
71
+ status_log.append(f"[說話者1] {text_to_speak}")
72
+ elif line.lower().startswith("speaker-2:"):
73
+ voice_to_use = speaker2_voice
74
+ text_to_speak = line.split(":", 1)[1].strip()
75
+ status_log.append(f"[說話者2] {text_to_speak}")
76
+ else:
77
+ # 如果沒有說話者標記,使用說話者1
78
+ status_log.append(f"[說話者1] {text_to_speak}")
79
+
80
+ try:
81
+ # 生成這一行的音頻
82
+ audio_chunk = get_mp3(
83
+ text_to_speak,
84
+ voice_to_use,
85
+ audio_model,
86
+ audio_api_key
87
+ )
88
+ combined_audio += audio_chunk
89
+
90
+ except Exception as e:
91
+ status_log.append(f"[錯誤] 無法生成音頻: {str(e)}")
92
+
93
+ return combined_audio, "\n".join(status_log)
94
+
95
+ def save_audio_file(audio_data: bytes) -> str:
96
+ """
97
+ 將音頻數據保存為臨時檔案
98
+ """
99
+ temp_dir = Path("./temp_audio")
100
+ temp_dir.mkdir(exist_ok=True)
101
+
102
+ # 清理舊檔案
103
+ for old_file in temp_dir.glob("*.mp3"):
104
+ if old_file.stat().st_mtime < (time.time() - 24*60*60): # 24小時前的檔案
105
+ old_file.unlink()
106
+
107
+ # 創建新的臨時檔案
108
+ temp_file = NamedTemporaryFile(
109
+ dir=temp_dir,
110
+ delete=False,
111
+ suffix=".mp3"
112
+ )
113
+
114
+ temp_file.write(audio_data)
115
+ temp_file.close()
116
+
117
+ return temp_file.name
118
+
119
+ # Gradio 介面
120
+ def create_gradio_interface():
121
+ with gr.Blocks(title="TTS Generator", css="""
122
+ #header { text-align: center; margin-bottom: 20px; }
123
+ """) as demo:
124
+ gr.Markdown("# 語音合成器 | TTS Generator", elem_id="header")
125
+
126
+ with gr.Row():
127
+ with gr.Column(scale=1):
128
+ # 輸入區
129
+ script_input = gr.Textbox(
130
+ label="輸入腳本 | Input Script",
131
+ placeholder="""請貼上腳本內容,格式如下:
132
+
133
+ speaker-1: 歡迎來到 David888 Podcast,我是 David...
134
+ speaker-2: 大家好,我是 Cordelia...
135
+
136
+ 沒有標記說話者的行會預設使用說話者1的聲音。""",
137
+ lines=20
138
+ )
139
+
140
+ api_key = gr.Textbox(
141
+ label="OpenAI API Key",
142
+ type="password"
143
+ )
144
+
145
+ with gr.Row():
146
+ audio_model = gr.Dropdown(
147
+ label="音頻模型 | Audio Model",
148
+ choices=STANDARD_AUDIO_MODELS,
149
+ value="tts-1"
150
+ )
151
+
152
+ speaker1_voice = gr.Dropdown(
153
+ label="說話者1聲音 | Speaker 1 Voice",
154
+ choices=STANDARD_VOICES,
155
+ value="onyx"
156
+ )
157
+
158
+ speaker2_voice = gr.Dropdown(
159
+ label="說話者2聲音 | Speaker 2 Voice",
160
+ choices=STANDARD_VOICES,
161
+ value="nova"
162
+ )
163
+
164
+ generate_button = gr.Button("生成音頻 | Generate Audio")
165
+
166
+ with gr.Column(scale=1):
167
+ # 輸出區
168
+ audio_output = gr.Audio(
169
+ label="生成的音頻 | Generated Audio",
170
+ type="filepath"
171
+ )
172
+
173
+ status_output = gr.Textbox(
174
+ label="生成狀態 | Generation Status",
175
+ lines=20,
176
+ show_copy_button=True
177
+ )
178
+
179
+ # 事件處理
180
+ generate_button.click(
181
+ fn=lambda script, key, model, v1, v2: process_and_save_audio(
182
+ script, key, model, v1, v2
183
+ ),
184
+ inputs=[
185
+ script_input,
186
+ api_key,
187
+ audio_model,
188
+ speaker1_voice,
189
+ speaker2_voice
190
+ ],
191
+ outputs=[audio_output, status_output]
192
+ )
193
+
194
+ return demo
195
+
196
+ def process_and_save_audio(script, api_key, model, voice1, voice2):
197
+ """
198
+ 處理音頻生成並保存檔案
199
+ """
200
+ try:
201
+ audio_data, status_log = generate_audio_from_script(
202
+ script,
203
+ api_key,
204
+ model,
205
+ voice1,
206
+ voice2
207
+ )
208
+
209
+ audio_path = save_audio_file(audio_data)
210
+ return audio_path, status_log
211
+
212
+ except Exception as e:
213
+ error_message = f"生成音頻時發生錯誤: {str(e)}"
214
+ print(error_message)
215
+ return None, error_message
216
+
217
+ if __name__ == "__main__":
218
+ demo = create_gradio_interface()
219
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ pandas
3
+ openai
4
+ loguru
5
+ promptic
6
+ tenacity
7
+ PyMuPDF
8
+ ebooklib
9
+ bs4