Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -140,6 +140,16 @@ def detect_silence(audio, threshold=0.01, min_silence_len=1000):
|
|
140 |
silent_regions.append((silent_start, len(audio)))
|
141 |
return silent_regions
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens):
|
144 |
try:
|
145 |
paragraphs = script_output.split('\n\n') # Split by double newline
|
@@ -192,6 +202,9 @@ def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p,
|
|
192 |
|
193 |
paragraph_audio = paragraph_audio.cpu().numpy().flatten()
|
194 |
|
|
|
|
|
|
|
195 |
# Normalize audio to [-1, 1] range
|
196 |
paragraph_audio = paragraph_audio / np.max(np.abs(paragraph_audio))
|
197 |
|
@@ -199,6 +212,9 @@ def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p,
|
|
199 |
|
200 |
final_audio = np.concatenate(audio_samples)
|
201 |
|
|
|
|
|
|
|
202 |
# Convert to 16-bit PCM
|
203 |
final_audio = (final_audio * 32767).astype(np.int16)
|
204 |
|
@@ -207,6 +223,9 @@ def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p,
|
|
207 |
sf.write(buffer, final_audio, 24000, format='WAV', subtype='PCM_16')
|
208 |
buffer.seek(0)
|
209 |
|
|
|
|
|
|
|
210 |
return buffer
|
211 |
except Exception as e:
|
212 |
logger.error(f"Error generating speech: {str(e)}")
|
@@ -323,27 +342,31 @@ def combined_callback(generate_script_clicks, generate_audio_clicks, advanced_se
|
|
323 |
logger.error(f"Error generating podcast script: {str(e)}")
|
324 |
return f"Error: {str(e)}", dash.no_update, dash.no_update, dash.no_update, "", ""
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
|
330 |
-
|
|
|
331 |
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
download_link
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
download_link
|
344 |
-
]), dash.no_update, dash.no_update, "", ""
|
345 |
-
else:
|
346 |
-
return dash.no_update, html.Div("Error generating audio"), dash.no_update, dash.no_update, "", ""
|
347 |
|
348 |
elif trigger_id == "advanced-settings-toggle":
|
349 |
return dash.no_update, dash.no_update, not is_advanced_open, dash.no_update, "", ""
|
|
|
140 |
silent_regions.append((silent_start, len(audio)))
|
141 |
return silent_regions
|
142 |
|
143 |
+
import logging
|
144 |
+
import numpy as np
|
145 |
+
import torch
|
146 |
+
import soundfile as sf
|
147 |
+
import io
|
148 |
+
from tqdm import tqdm
|
149 |
+
|
150 |
+
logging.basicConfig(level=logging.INFO)
|
151 |
+
logger = logging.getLogger(__name__)
|
152 |
+
|
153 |
def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens):
|
154 |
try:
|
155 |
paragraphs = script_output.split('\n\n') # Split by double newline
|
|
|
202 |
|
203 |
paragraph_audio = paragraph_audio.cpu().numpy().flatten()
|
204 |
|
205 |
+
# Log audio statistics
|
206 |
+
logger.info(f"Paragraph {i+1} audio shape: {paragraph_audio.shape}, min: {np.min(paragraph_audio)}, max: {np.max(paragraph_audio)}")
|
207 |
+
|
208 |
# Normalize audio to [-1, 1] range
|
209 |
paragraph_audio = paragraph_audio / np.max(np.abs(paragraph_audio))
|
210 |
|
|
|
212 |
|
213 |
final_audio = np.concatenate(audio_samples)
|
214 |
|
215 |
+
# Log final audio statistics
|
216 |
+
logger.info(f"Final audio shape: {final_audio.shape}, min: {np.min(final_audio)}, max: {np.max(final_audio)}")
|
217 |
+
|
218 |
# Convert to 16-bit PCM
|
219 |
final_audio = (final_audio * 32767).astype(np.int16)
|
220 |
|
|
|
223 |
sf.write(buffer, final_audio, 24000, format='WAV', subtype='PCM_16')
|
224 |
buffer.seek(0)
|
225 |
|
226 |
+
# Log buffer size
|
227 |
+
logger.info(f"Audio buffer size: {buffer.getbuffer().nbytes} bytes")
|
228 |
+
|
229 |
return buffer
|
230 |
except Exception as e:
|
231 |
logger.error(f"Error generating speech: {str(e)}")
|
|
|
342 |
logger.error(f"Error generating podcast script: {str(e)}")
|
343 |
return f"Error: {str(e)}", dash.no_update, dash.no_update, dash.no_update, "", ""
|
344 |
|
345 |
+
elif trigger_id == "generate-audio-btn":
|
346 |
+
if not script_output.strip():
|
347 |
+
return dash.no_update, html.Div("No audio generated yet."), dash.no_update, dash.no_update, "", ""
|
348 |
+
|
349 |
+
audio_buffer = generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens)
|
350 |
+
|
351 |
+
if audio_buffer is not None:
|
352 |
+
# Convert to base64 for audio playback
|
353 |
+
audio_base64 = base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
|
354 |
+
src = f"data:audio/wav;base64,{audio_base64}"
|
355 |
|
356 |
+
# Log audio file size
|
357 |
+
logger.info(f"Generated audio file size: {len(audio_base64)} bytes")
|
358 |
|
359 |
+
# Create a download link for the audio
|
360 |
+
download_link = html.A("Download Audio", href=src, download="generated_audio.wav")
|
361 |
+
|
362 |
+
return dash.no_update, html.Div([
|
363 |
+
html.Audio(src=src, controls=True),
|
364 |
+
html.Br(),
|
365 |
+
download_link
|
366 |
+
]), dash.no_update, dash.no_update, "", ""
|
367 |
+
else:
|
368 |
+
logger.error("Failed to generate audio")
|
369 |
+
return dash.no_update, html.Div("Error generating audio"), dash.no_update, dash.no_update, "", ""
|
|
|
|
|
|
|
|
|
370 |
|
371 |
elif trigger_id == "advanced-settings-toggle":
|
372 |
return dash.no_update, dash.no_update, not is_advanced_open, dash.no_update, "", ""
|