Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from pydub import AudioSegment
|
|
15 |
from docx import Document
|
16 |
import PyPDF2
|
17 |
from tqdm import tqdm
|
|
|
18 |
|
19 |
# Initialize logging
|
20 |
logging.basicConfig(level=logging.INFO)
|
@@ -191,16 +192,22 @@ def generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p,
|
|
191 |
|
192 |
paragraph_audio = paragraph_audio.cpu().numpy().flatten()
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
paragraph_audio = paragraph_audio[:silences[-1][1]]
|
197 |
|
198 |
audio_samples.append(paragraph_audio)
|
199 |
|
200 |
final_audio = np.concatenate(audio_samples)
|
201 |
-
final_audio = np.int16(final_audio / np.max(np.abs(final_audio)) * 32767)
|
202 |
|
203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
except Exception as e:
|
205 |
logger.error(f"Error generating speech: {str(e)}")
|
206 |
return None
|
@@ -320,11 +327,11 @@ def combined_callback(generate_script_clicks, generate_audio_clicks, advanced_se
|
|
320 |
if not script_output.strip():
|
321 |
return dash.no_update, html.Div("No audio generated yet."), dash.no_update, dash.no_update, "", ""
|
322 |
|
323 |
-
|
324 |
|
325 |
-
if
|
326 |
# Convert to base64 for audio playback
|
327 |
-
audio_base64 = base64.b64encode(
|
328 |
src = f"data:audio/wav;base64,{audio_base64}"
|
329 |
|
330 |
# Create a download link for the audio
|
|
|
15 |
from docx import Document
|
16 |
import PyPDF2
|
17 |
from tqdm import tqdm
|
18 |
+
import soundfile as sf
|
19 |
|
20 |
# Initialize logging
|
21 |
logging.basicConfig(level=logging.INFO)
|
|
|
192 |
|
193 |
paragraph_audio = paragraph_audio.cpu().numpy().flatten()
|
194 |
|
195 |
+
# Normalize audio to [-1, 1] range
|
196 |
+
paragraph_audio = paragraph_audio / np.max(np.abs(paragraph_audio))
|
|
|
197 |
|
198 |
audio_samples.append(paragraph_audio)
|
199 |
|
200 |
final_audio = np.concatenate(audio_samples)
|
|
|
201 |
|
202 |
+
# Convert to 16-bit PCM
|
203 |
+
final_audio = (final_audio * 32767).astype(np.int16)
|
204 |
+
|
205 |
+
# Save as WAV file in memory
|
206 |
+
buffer = io.BytesIO()
|
207 |
+
sf.write(buffer, final_audio, 24000, format='WAV', subtype='PCM_16')
|
208 |
+
buffer.seek(0)
|
209 |
+
|
210 |
+
return buffer
|
211 |
except Exception as e:
|
212 |
logger.error(f"Error generating speech: {str(e)}")
|
213 |
return None
|
|
|
327 |
if not script_output.strip():
|
328 |
return dash.no_update, html.Div("No audio generated yet."), dash.no_update, dash.no_update, "", ""
|
329 |
|
330 |
+
audio_buffer = generate_audio(script_output, voice1, voice2, num_hosts, temperature, top_p, repetition_penalty, max_new_tokens)
|
331 |
|
332 |
+
if audio_buffer is not None:
|
333 |
# Convert to base64 for audio playback
|
334 |
+
audio_base64 = base64.b64encode(audio_buffer.getvalue()).decode('utf-8')
|
335 |
src = f"data:audio/wav;base64,{audio_base64}"
|
336 |
|
337 |
# Create a download link for the audio
|