Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Jan 27

Commit

a92463e

verified ·

1 Parent(s): a76d08f

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -30

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ from TTS.api import TTS
 # Load Environment Variables
 # ---------------------------------------------------------------------
 load_dotenv()
-HF_TOKEN = os.getenv("HF_TOKEN")  # Adjust if needed
 # ---------------------------------------------------------------------
 # Global Model Caches
@@ -66,7 +66,6 @@ def get_musicgen_model(model_key: str = "facebook/musicgen-large"):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model.to(device)
     MUSICGEN_MODELS[model_key] = (model, processor)
     return model, processor
@@ -175,7 +174,7 @@ def generate_voice(script: str, tts_model_name: str = "tts_models/en/ljspeech/ta
 # ---------------------------------------------------------------------
-# Music Generation Function (Using facebook/musicgen-large)
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=100)
 def generate_music(prompt: str, audio_length: int):
@@ -209,13 +208,15 @@ def generate_music(prompt: str, audio_length: int):
 # ---------------------------------------------------------------------
-# Audio Blending Function with Ducking
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=100)
 def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
     """
-    Blends two audio files (voice and music). If ducking=True,
-    the music is attenuated by 'duck_level' dB while the voice is playing.
     Returns the file path to the blended .wav file.
     """
     try:
@@ -225,20 +226,27 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
         voice = AudioSegment.from_wav(voice_path)
         music = AudioSegment.from_wav(music_path)
-        # If the voice is longer than the music, extend music with silence
-        if len(voice) > len(music):
-            extension = AudioSegment.silent(duration=(len(voice) - len(music)))
-            music = music + extension
         if ducking:
-            # Step 1: Reduce music by `duck_level` dB for the portion matching the voice duration
-            ducked_music_part = music[:len(voice)] - duck_level
-            # Overlay voice on top of the ducked music portion
-            voice_overlaid = ducked_music_part.overlay(voice)
-            # Step 2: Keep the rest of the music as-is
-            remainder = music[len(voice):]
-            final_audio = voice_overlaid + remainder
         else:
             # No ducking, just overlay
             final_audio = music.overlay(voice)
@@ -256,16 +264,18 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int
 # ---------------------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("""
-    # 🎧 AI Promo Studio with Music, Voice Over & Audio Blending 🚀
-     Welcome to **AI Promo Studio**, your one-stop solution for creating stunning and professional radio promos with ease!
-     Whether you're a sound designer, radio producer, or content creator, our AI-driven tools, powered by advanced LLM Llama models, empower you to bring your vision to life in just a few steps.
-    **Workflow**:
-    1. **Generate Script**
-    2. **Generate Voice-Over**
-    3. **Generate Music**
-    4. **Blend** (Voice + Music) with optional ducking
-    """)
     with gr.Tabs():
         # Step 1: Generate Script
@@ -342,9 +352,9 @@ with gr.Blocks() as demo:
                 outputs=[music_output],
             )
-        # Step 4: Blend Audio
         with gr.Tab("Step 4: Blend Audio"):
-            gr.Markdown("Combine voice-over and music, optionally applying ducking.")
             ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
             duck_level_slider = gr.Slider(
                 label="Ducking Level (dB attenuation)",

 # Load Environment Variables
 # ---------------------------------------------------------------------
 load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
 # ---------------------------------------------------------------------
 # Global Model Caches
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model.to(device)
     MUSICGEN_MODELS[model_key] = (model, processor)
     return model, processor
 # ---------------------------------------------------------------------
+# Music Generation Function
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=100)
 def generate_music(prompt: str, audio_length: int):
 # ---------------------------------------------------------------------
+# Audio Blending with Duration Sync & Ducking
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=100)
 def blend_audio(voice_path: str, music_path: str, ducking: bool, duck_level: int = 10):
     """
+    Blends two audio files (voice and music).
+    1. If music < voice, loops the music until it meets/exceeds the voice duration.
+    2. If music > voice, trims music to the voice duration.
+    3. If ducking=True, the music is attenuated by 'duck_level' dB while the voice is playing.
     Returns the file path to the blended .wav file.
     """
     try:
         voice = AudioSegment.from_wav(voice_path)
         music = AudioSegment.from_wav(music_path)
+        voice_len = len(voice)  # in milliseconds
+        music_len = len(music)  # in milliseconds
+        # 1) If the music is shorter than the voice, loop it:
+        if music_len < voice_len:
+            looped_music = AudioSegment.empty()
+            # Keep appending until we exceed voice length
+            while len(looped_music) < voice_len:
+                looped_music += music
+            music = looped_music
+        # 2) If the music is longer than the voice, truncate it:
+        if len(music) > voice_len:
+            music = music[:voice_len]
+        # Now music and voice are the same length
         if ducking:
+            # Step 1: Reduce music dB while voice is playing
+            ducked_music = music - duck_level
+            # Step 2: Overlay voice on top of ducked music
+            final_audio = ducked_music.overlay(voice)
         else:
             # No ducking, just overlay
             final_audio = music.overlay(voice)
 # ---------------------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("""
+# 🎧 AI Promo Studio
+Welcome to **AI Promo Studio**, your all-in-one solution for creating professional, engaging audio promos with minimal effort!
+This next-generation platform uses powerful AI models to handle:
+- **Script Generation**: Craft concise and impactful copy with LLaMA.
+- **Voice Synthesis**: Convert text into natural-sounding voice-overs using Coqui TTS.
+- **Music Production**: Generate custom music tracks with MusicGen Large for sound bed.
+- **Seamless Blending**: Easily combine voice and music—loop or trim tracks to match your desired promo length, with optional ducking to keep the voice front and center.
+Whether you’re a radio producer, podcaster, or content creator, **AI Promo Studio** streamlines your entire production pipeline—cutting hours of manual editing down to a few clicks.
+""")
     with gr.Tabs():
         # Step 1: Generate Script
                 outputs=[music_output],
             )
+        # Step 4: Blend Audio (Loop/Trim + Ducking)
         with gr.Tab("Step 4: Blend Audio"):
+            gr.Markdown("**Music** will be looped or trimmed to match **Voice** duration, then optionally ducked.")
             ducking_checkbox = gr.Checkbox(label="Enable Ducking?", value=True)
             duck_level_slider = gr.Slider(
                 label="Ducking Level (dB attenuation)",