Bils commited on
Commit
a92463e
·
verified ·
1 Parent(s): a76d08f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -30
app.py CHANGED
@@ -21,7 +21,7 @@ from TTS.api import TTS
21
  # Load Environment Variables
22
  # ---------------------------------------------------------------------
23
  load_dotenv()
24
- HF_TOKEN = os.getenv("HF_TOKEN") # Adjust if needed
25
 
26
  # ---------------------------------------------------------------------
27
  # Global Model Caches
@@ -66,7 +66,6 @@ def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
66
 
67
  device = "cuda" if torch.cuda.is_available() else "cpu"
68
  model.to(device)
69
-
70
  MUSICGEN_MODELS[model_key] = (model, processor)
71
  return model, processor
72
 
@@ -175,7 +174,7 @@ def generate_voice(script: str, tts_model_name: str = "tts_models/en/ljspeech/ta
175
 
176
 
177
  # ---------------------------------------------------------------------
178
- # Music Generation Function (Using facebook/musicgen-large)
179
  # ---------------------------------------------------------------------
180
  @spaces.GPU(duration=100)
181
  def generate_music(prompt: str, audio_length: int):
@@ -209,13 +208,15 @@ def generate_music(prompt: str, audio_length: int):
209
 
210
 
211
  # ---------------------------------------------------------------------
212
- # Audio Blending Function with Ducking
213
  # ---------------------------------------------------------------------
214
  @spaces.GPU(duration=100)
215
  def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
216
  """
217
- Blends two audio files (voice and music). If ducking=True,
218
- the music is attenuated by 'duck_level' dB while the voice is playing.
 
 
219
  Returns the file path to the blended .wav file.
220
  """
221
  try:
@@ -225,20 +226,27 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
225
  voice = AudioSegment.from_wav(voice_path)
226
  music = AudioSegment.from_wav(music_path)
227
 
228
- # If the voice is longer than the music, extend music with silence
229
- if len(voice) > len(music):
230
- extension = AudioSegment.silent(duration=(len(voice) - len(music)))
231
- music = music + extension
 
 
 
 
 
 
232
 
 
 
 
 
 
233
  if ducking:
234
- # Step 1: Reduce music by `duck_level` dB for the portion matching the voice duration
235
- ducked_music_part = music[:len(voice)] - duck_level
236
- # Overlay voice on top of the ducked music portion
237
- voice_overlaid = ducked_music_part.overlay(voice)
238
-
239
- # Step 2: Keep the rest of the music as-is
240
- remainder = music[len(voice):]
241
- final_audio = voice_overlaid + remainder
242
  else:
243
  # No ducking, just overlay
244
  final_audio = music.overlay(voice)
@@ -256,16 +264,18 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
256
  # ---------------------------------------------------------------------
257
  with gr.Blocks() as demo:
258
  gr.Markdown("""
259
- # 🎧 AI Promo Studio with Music, Voice Over & Audio Blending 🚀
260
- Welcome to **AI Promo Studio**, your one-stop solution for creating stunning and professional radio promos with ease!
261
- Whether you're a sound designer, radio producer, or content creator, our AI-driven tools, powered by advanced LLM Llama models, empower you to bring your vision to life in just a few steps.
262
-
263
- **Workflow**:
264
- 1. **Generate Script**
265
- 2. **Generate Voice-Over**
266
- 3. **Generate Music**
267
- 4. **Blend** (Voice + Music) with optional ducking
268
- """)
 
 
269
 
270
  with gr.Tabs():
271
  # Step 1: Generate Script
@@ -342,9 +352,9 @@ with gr.Blocks() as demo:
342
  outputs=[music_output],
343
  )
344
 
345
- # Step 4: Blend Audio
346
  with gr.Tab("Step 4: Blend Audio"):
347
- gr.Markdown("Combine voice-over and music, optionally applying ducking.")
348
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
349
  duck_level_slider = gr.Slider(
350
  label="Ducking Level (dB attenuation)",
 
21
  # Load Environment Variables
22
  # ---------------------------------------------------------------------
23
  load_dotenv()
24
+ HF_TOKEN = os.getenv("HF_TOKEN")
25
 
26
  # ---------------------------------------------------------------------
27
  # Global Model Caches
 
66
 
67
  device = "cuda" if torch.cuda.is_available() else "cpu"
68
  model.to(device)
 
69
  MUSICGEN_MODELS[model_key] = (model, processor)
70
  return model, processor
71
 
 
174
 
175
 
176
  # ---------------------------------------------------------------------
177
+ # Music Generation Function
178
  # ---------------------------------------------------------------------
179
  @spaces.GPU(duration=100)
180
  def generate_music(prompt: str, audio_length: int):
 
208
 
209
 
210
  # ---------------------------------------------------------------------
211
+ # Audio Blending with Duration Sync & Ducking
212
  # ---------------------------------------------------------------------
213
  @spaces.GPU(duration=100)
214
  def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
215
  """
216
+ Blends two audio files (voice and music).
217
+ 1. If music < voice, loops the music until it meets/exceeds the voice duration.
218
+ 2. If music > voice, trims music to the voice duration.
219
+ 3. If ducking=True, the music is attenuated by 'duck_level' dB while the voice is playing.
220
  Returns the file path to the blended .wav file.
221
  """
222
  try:
 
226
  voice = AudioSegment.from_wav(voice_path)
227
  music = AudioSegment.from_wav(music_path)
228
 
229
+ voice_len = len(voice) # in milliseconds
230
+ music_len = len(music) # in milliseconds
231
+
232
+ # 1) If the music is shorter than the voice, loop it:
233
+ if music_len < voice_len:
234
+ looped_music = AudioSegment.empty()
235
+ # Keep appending until we exceed voice length
236
+ while len(looped_music) < voice_len:
237
+ looped_music += music
238
+ music = looped_music
239
 
240
+ # 2) If the music is longer than the voice, truncate it:
241
+ if len(music) > voice_len:
242
+ music = music[:voice_len]
243
+
244
+ # Now music and voice are the same length
245
  if ducking:
246
+ # Step 1: Reduce music dB while voice is playing
247
+ ducked_music = music - duck_level
248
+ # Step 2: Overlay voice on top of ducked music
249
+ final_audio = ducked_music.overlay(voice)
 
 
 
 
250
  else:
251
  # No ducking, just overlay
252
  final_audio = music.overlay(voice)
 
264
  # ---------------------------------------------------------------------
265
  with gr.Blocks() as demo:
266
  gr.Markdown("""
267
+ # 🎧 AI Promo Studio
268
+ Welcome to **AI Promo Studio**, your all-in-one solution for creating professional, engaging audio promos with minimal effort!
269
+
270
+ This next-generation platform uses powerful AI models to handle:
271
+ - **Script Generation**: Craft concise and impactful copy with LLaMA.
272
+ - **Voice Synthesis**: Convert text into natural-sounding voice-overs using Coqui TTS.
273
+ - **Music Production**: Generate custom music tracks with MusicGen Large for sound bed.
274
+ - **Seamless Blending**: Easily combine voice and music—loop or trim tracks to match your desired promo length, with optional ducking to keep the voice front and center.
275
+
276
+ Whether you’re a radio producer, podcaster, or content creator, **AI Promo Studio** streamlines your entire production pipeline—cutting hours of manual editing down to a few clicks.
277
+ """)
278
+
279
 
280
  with gr.Tabs():
281
  # Step 1: Generate Script
 
352
  outputs=[music_output],
353
  )
354
 
355
+ # Step 4: Blend Audio (Loop/Trim + Ducking)
356
  with gr.Tab("Step 4: Blend Audio"):
357
+ gr.Markdown("**Music** will be looped or trimmed to match **Voice** duration, then optionally ducked.")
358
  ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
359
  duck_level_slider = gr.Slider(
360
  label="Ducking Level (dB attenuation)",