bluenevus commited on
Commit
1428109
·
verified ·
1 Parent(s): a88e595

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -19
app.py CHANGED
@@ -140,6 +140,16 @@ def detect_silence(audio, threshold=0.01, min_silence_len=1000):
140
  silent_regions.append((silent_start, len(audio)))
141
  return silent_regions
142
 
 
 
 
 
 
 
 
 
 
 
143
  def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens):
144
  try:
145
  paragraphs = script_output.split('\n\n') # Split by double newline
@@ -192,6 +202,9 @@ def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p,
192
 
193
  paragraph_audio = paragraph_audio.cpu().numpy().flatten()
194
 
 
 
 
195
  # Normalize audio to [-1, 1] range
196
  paragraph_audio = paragraph_audio / np.max(np.abs(paragraph_audio))
197
 
@@ -199,6 +212,9 @@ def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p,
199
 
200
  final_audio = np.concatenate(audio_samples)
201
 
 
 
 
202
  # Convert to 16-bit PCM
203
  final_audio = (final_audio * 32767).astype(np.int16)
204
 
@@ -207,6 +223,9 @@ def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p,
207
  sf.write(buffer, final_audio, 24000, format='WAV', subtype='PCM_16')
208
  buffer.seek(0)
209
 
 
 
 
210
  return buffer
211
  except Exception as e:
212
  logger.error(f"Error generating speech: {str(e)}")
@@ -323,27 +342,31 @@ def combined_callback(generate_script_clicks, generate_audio_clicks, advanced_se
323
  logger.error(f"Error generating podcast script: {str(e)}")
324
  return f"Error: {str(e)}", dash.no_update, dash.no_update, dash.no_update, "", ""
325
 
326
- elif trigger_id == "generate-audio-btn":
327
- if not script_output.strip():
328
- return dash.no_update, html.Div("No audio generated yet."), dash.no_update, dash.no_update, "", ""
 
 
 
 
 
 
 
329
 
330
- audio_buffer = generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens)
 
331
 
332
- if audio_buffer is not None:
333
- # Convert to base64 for audio playback
334
- audio_base64 = base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
335
- src = f"data:audio/wav;base64,{audio_base64}"
336
-
337
- # Create a download link for the audio
338
- download_link = html.A("Download Audio", href=src, download="generated_audio.wav")
339
-
340
- return dash.no_update, html.Div([
341
- html.Audio(src=src, controls=True),
342
- html.Br(),
343
- download_link
344
- ]), dash.no_update, dash.no_update, "", ""
345
- else:
346
- return dash.no_update, html.Div("Error generating audio"), dash.no_update, dash.no_update, "", ""
347
 
348
  elif trigger_id == "advanced-settings-toggle":
349
  return dash.no_update, dash.no_update, not is_advanced_open, dash.no_update, "", ""
 
140
  silent_regions.append((silent_start, len(audio)))
141
  return silent_regions
142
 
143
+ import logging
144
+ import numpy as np
145
+ import torch
146
+ import soundfile as sf
147
+ import io
148
+ from tqdm import tqdm
149
+
150
+ logging.basicConfig(level=logging.INFO)
151
+ logger = logging.getLogger(__name__)
152
+
153
  def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens):
154
  try:
155
  paragraphs = script_output.split('\n\n') # Split by double newline
 
202
 
203
  paragraph_audio = paragraph_audio.cpu().numpy().flatten()
204
 
205
+ # Log audio statistics
206
+ logger.info(f"Paragraph {i+1} audio shape: {paragraph_audio.shape}, min: {np.min(paragraph_audio)}, max: {np.max(paragraph_audio)}")
207
+
208
  # Normalize audio to [-1, 1] range
209
  paragraph_audio = paragraph_audio / np.max(np.abs(paragraph_audio))
210
 
 
212
 
213
  final_audio = np.concatenate(audio_samples)
214
 
215
+ # Log final audio statistics
216
+ logger.info(f"Final audio shape: {final_audio.shape}, min: {np.min(final_audio)}, max: {np.max(final_audio)}")
217
+
218
  # Convert to 16-bit PCM
219
  final_audio = (final_audio * 32767).astype(np.int16)
220
 
 
223
  sf.write(buffer, final_audio, 24000, format='WAV', subtype='PCM_16')
224
  buffer.seek(0)
225
 
226
+ # Log buffer size
227
+ logger.info(f"Audio buffer size: {buffer.getbuffer().nbytes} bytes")
228
+
229
  return buffer
230
  except Exception as e:
231
  logger.error(f"Error generating speech: {str(e)}")
 
342
  logger.error(f"Error generating podcast script: {str(e)}")
343
  return f"Error: {str(e)}", dash.no_update, dash.no_update, dash.no_update, "", ""
344
 
345
+ elif trigger_id == "generate-audio-btn":
346
+ if not script_output.strip():
347
+ return dash.no_update, html.Div("No audio generated yet."), dash.no_update, dash.no_update, "", ""
348
+
349
+ audio_buffer = generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens)
350
+
351
+ if audio_buffer is not None:
352
+ # Convert to base64 for audio playback
353
+ audio_base64 = base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
354
+ src = f"data:audio/wav;base64,{audio_base64}"
355
 
356
+ # Log audio file size
357
+ logger.info(f"Generated audio file size: {len(audio_base64)} bytes")
358
 
359
+ # Create a download link for the audio
360
+ download_link = html.A("Download Audio", href=src, download="generated_audio.wav")
361
+
362
+ return dash.no_update, html.Div([
363
+ html.Audio(src=src, controls=True),
364
+ html.Br(),
365
+ download_link
366
+ ]), dash.no_update, dash.no_update, "", ""
367
+ else:
368
+ logger.error("Failed to generate audio")
369
+ return dash.no_update, html.Div("Error generating audio"), dash.no_update, dash.no_update, "", ""
 
 
 
 
370
 
371
  elif trigger_id == "advanced-settings-toggle":
372
  return dash.no_update, dash.no_update, not is_advanced_open, dash.no_update, "", ""