Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ from TTS.api import TTS
|
|
21 |
# Load Environment Variables
|
22 |
# ---------------------------------------------------------------------
|
23 |
load_dotenv()
|
24 |
-
HF_TOKEN = os.getenv("HF_TOKEN")
|
25 |
|
26 |
# ---------------------------------------------------------------------
|
27 |
# Global Model Caches
|
@@ -66,7 +66,6 @@ def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
|
|
66 |
|
67 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
68 |
model.to(device)
|
69 |
-
|
70 |
MUSICGEN_MODELS[model_key] = (model, processor)
|
71 |
return model, processor
|
72 |
|
@@ -175,7 +174,7 @@ def generate_voice(script: str, tts_model_name: str = "tts_models/en/ljspeech/ta
|
|
175 |
|
176 |
|
177 |
# ---------------------------------------------------------------------
|
178 |
-
# Music Generation Function
|
179 |
# ---------------------------------------------------------------------
|
180 |
@spaces.GPU(duration=100)
|
181 |
def generate_music(prompt: str, audio_length: int):
|
@@ -209,13 +208,15 @@ def generate_music(prompt: str, audio_length: int):
|
|
209 |
|
210 |
|
211 |
# ---------------------------------------------------------------------
|
212 |
-
# Audio Blending
|
213 |
# ---------------------------------------------------------------------
|
214 |
@spaces.GPU(duration=100)
|
215 |
def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
|
216 |
"""
|
217 |
-
Blends two audio files (voice and music).
|
218 |
-
|
|
|
|
|
219 |
Returns the file path to the blended .wav file.
|
220 |
"""
|
221 |
try:
|
@@ -225,20 +226,27 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
|
|
225 |
voice = AudioSegment.from_wav(voice_path)
|
226 |
music = AudioSegment.from_wav(music_path)
|
227 |
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
|
|
|
|
|
|
|
|
|
|
233 |
if ducking:
|
234 |
-
# Step 1: Reduce music
|
235 |
-
|
236 |
-
# Overlay voice on top of
|
237 |
-
|
238 |
-
|
239 |
-
# Step 2: Keep the rest of the music as-is
|
240 |
-
remainder = music[len(voice):]
|
241 |
-
final_audio = voice_overlaid + remainder
|
242 |
else:
|
243 |
# No ducking, just overlay
|
244 |
final_audio = music.overlay(voice)
|
@@ -256,16 +264,18 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
|
|
256 |
# ---------------------------------------------------------------------
|
257 |
with gr.Blocks() as demo:
|
258 |
gr.Markdown("""
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
269 |
|
270 |
with gr.Tabs():
|
271 |
# Step 1: Generate Script
|
@@ -342,9 +352,9 @@ with gr.Blocks() as demo:
|
|
342 |
outputs=[music_output],
|
343 |
)
|
344 |
|
345 |
-
# Step 4: Blend Audio
|
346 |
with gr.Tab("Step 4: Blend Audio"):
|
347 |
-
gr.Markdown("
|
348 |
ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
|
349 |
duck_level_slider = gr.Slider(
|
350 |
label="Ducking Level (dB attenuation)",
|
|
|
21 |
# Load Environment Variables
|
22 |
# ---------------------------------------------------------------------
|
23 |
load_dotenv()
|
24 |
+
HF_TOKEN = os.getenv("HF_TOKEN")
|
25 |
|
26 |
# ---------------------------------------------------------------------
|
27 |
# Global Model Caches
|
|
|
66 |
|
67 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
68 |
model.to(device)
|
|
|
69 |
MUSICGEN_MODELS[model_key] = (model, processor)
|
70 |
return model, processor
|
71 |
|
|
|
174 |
|
175 |
|
176 |
# ---------------------------------------------------------------------
|
177 |
+
# Music Generation Function
|
178 |
# ---------------------------------------------------------------------
|
179 |
@spaces.GPU(duration=100)
|
180 |
def generate_music(prompt: str, audio_length: int):
|
|
|
208 |
|
209 |
|
210 |
# ---------------------------------------------------------------------
|
211 |
+
# Audio Blending with Duration Sync & Ducking
|
212 |
# ---------------------------------------------------------------------
|
213 |
@spaces.GPU(duration=100)
|
214 |
def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
|
215 |
"""
|
216 |
+
Blends two audio files (voice and music).
|
217 |
+
1. If music < voice, loops the music until it meets/exceeds the voice duration.
|
218 |
+
2. If music > voice, trims music to the voice duration.
|
219 |
+
3. If ducking=True, the music is attenuated by 'duck_level' dB while the voice is playing.
|
220 |
Returns the file path to the blended .wav file.
|
221 |
"""
|
222 |
try:
|
|
|
226 |
voice = AudioSegment.from_wav(voice_path)
|
227 |
music = AudioSegment.from_wav(music_path)
|
228 |
|
229 |
+
voice_len = len(voice) # in milliseconds
|
230 |
+
music_len = len(music) # in milliseconds
|
231 |
+
|
232 |
+
# 1) If the music is shorter than the voice, loop it:
|
233 |
+
if music_len < voice_len:
|
234 |
+
looped_music = AudioSegment.empty()
|
235 |
+
# Keep appending until we exceed voice length
|
236 |
+
while len(looped_music) < voice_len:
|
237 |
+
looped_music += music
|
238 |
+
music = looped_music
|
239 |
|
240 |
+
# 2) If the music is longer than the voice, truncate it:
|
241 |
+
if len(music) > voice_len:
|
242 |
+
music = music[:voice_len]
|
243 |
+
|
244 |
+
# Now music and voice are the same length
|
245 |
if ducking:
|
246 |
+
# Step 1: Reduce music dB while voice is playing
|
247 |
+
ducked_music = music - duck_level
|
248 |
+
# Step 2: Overlay voice on top of ducked music
|
249 |
+
final_audio = ducked_music.overlay(voice)
|
|
|
|
|
|
|
|
|
250 |
else:
|
251 |
# No ducking, just overlay
|
252 |
final_audio = music.overlay(voice)
|
|
|
264 |
# ---------------------------------------------------------------------
|
265 |
with gr.Blocks() as demo:
|
266 |
gr.Markdown("""
|
267 |
+
# 🎧 AI Promo Studio
|
268 |
+
Welcome to **AI Promo Studio**, your all-in-one solution for creating professional, engaging audio promos with minimal effort!
|
269 |
+
|
270 |
+
This next-generation platform uses powerful AI models to handle:
|
271 |
+
- **Script Generation**: Craft concise and impactful copy with LLaMA.
|
272 |
+
- **Voice Synthesis**: Convert text into natural-sounding voice-overs using Coqui TTS.
|
273 |
+
- **Music Production**: Generate custom music tracks with MusicGen Large for sound bed.
|
274 |
+
- **Seamless Blending**: Easily combine voice and music—loop or trim tracks to match your desired promo length, with optional ducking to keep the voice front and center.
|
275 |
+
|
276 |
+
Whether you’re a radio producer, podcaster, or content creator, **AI Promo Studio** streamlines your entire production pipeline—cutting hours of manual editing down to a few clicks.
|
277 |
+
""")
|
278 |
+
|
279 |
|
280 |
with gr.Tabs():
|
281 |
# Step 1: Generate Script
|
|
|
352 |
outputs=[music_output],
|
353 |
)
|
354 |
|
355 |
+
# Step 4: Blend Audio (Loop/Trim + Ducking)
|
356 |
with gr.Tab("Step 4: Blend Audio"):
|
357 |
+
gr.Markdown("**Music** will be looped or trimmed to match **Voice** duration, then optionally ducked.")
|
358 |
ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
|
359 |
duck_level_slider = gr.Slider(
|
360 |
label="Ducking Level (dB attenuation)",
|