Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Jan 12

Commit

e564c8e

verified ·

1 Parent(s): 9fc7b58

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -74

app.py CHANGED Viewed

@@ -35,25 +35,19 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
         llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
         system_prompt = (
-            "You are an expert radio imaging producer specializing in sound design and music.\n"
-            "---\n"
-            f"Based on the user's concept and the selected duration of {duration} seconds, craft a concise, engaging promo script.\n"
-            "---\n"
-            "Ensure the script fits within the time limit and suggest a matching music style that complements the theme."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script and music suggestion:"
-        result = llama_pipeline(combined_prompt, max_new_tokens=500, do_sample=True, temperature=0.9)
-        generated_text = result[0]["generated_text"]
-        if "Refined script and music suggestion:" in generated_text:
-            parts = generated_text.split("Refined script and music suggestion:", 1)[-1].strip()
-            if "Music Style:" in parts:
-                script, music_suggestion = parts.split("Music Style:", 1)
-                return script.strip(), music_suggestion.strip()
-            else:
-                return parts.strip(), "No specific music suggestion found."
-        return "Error: Could not parse the script.", None
     except Exception as e:
         return f"Error generating script: {e}", None
@@ -61,9 +55,10 @@ def generate_script(user_prompt: str, model_id: str, token: str, duration: int):
 # Voice-Over Generation Function
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=300)
-def generate_voice(script: str):
     try:
-        tts_model = "coqui/xtts-en-ljspeech-v2"
         processor = AutoProcessor.from_pretrained(tts_model)
         model = AutoModelForCausalLM.from_pretrained(tts_model)
@@ -125,64 +120,66 @@ def blend_audio(voice_path: str, music_path: str, ducking: bool):
 # ---------------------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("""
-        # 🎧 AI Promo Studio with Pages 🚀
-        Follow a step-by-step process to create amazing promos with AI.
     """)
-    with gr.Tabs():
-        # Step 1: Script Generation
-        with gr.Tab("Step 1: Generate Script"):
-            user_prompt = gr.Textbox(label="Promo Idea", placeholder="E.g., A 30-second promo for a morning show.")
-            llama_model_id = gr.Textbox(label="Llama Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
-            duration = gr.Slider(label="Duration (seconds)", minimum=15, maximum=60, step=15, value=30)
-            generate_script_button = gr.Button("Generate Script")
-            script_output = gr.Textbox(label="Generated Script")
-            music_suggestion_output = gr.Textbox(label="Music Suggestion")
-            generate_script_button.click(
-                fn=generate_script,
-                inputs=[user_prompt, llama_model_id, hf_token, duration],
-                outputs=[script_output, music_suggestion_output],
-            )
-        # Step 2: Voice Generation
-        with gr.Tab("Step 2: Generate Voice"):
-            script_input = gr.Textbox(label="Script for Voice", interactive=False)
-            generate_voice_button = gr.Button("Generate Voice")
-            voice_output = gr.Audio(label="Generated Voice", type="filepath")
-            generate_voice_button.click(
-                fn=generate_voice,
-                inputs=[script_input],
-                outputs=[voice_output],
-            )
-        # Step 3: Music Generation
-        with gr.Tab("Step 3: Generate Music"):
-            music_prompt_input = gr.Textbox(label="Music Suggestion Prompt", interactive=False)
-            audio_length = gr.Slider(label="Music Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
-            generate_music_button = gr.Button("Generate Music")
-            music_output = gr.Audio(label="Generated Music", type="filepath")
-            generate_music_button.click(
-                fn=generate_music,
-                inputs=[music_prompt_input, audio_length],
-                outputs=[music_output],
-            )
-        # Step 4: Blend Audio
-        with gr.Tab("Step 4: Blend Audio"):
-            voice_path = gr.Audio(label="Voice File", type="filepath")
-            music_path = gr.Audio(label="Music File", type="filepath")
-            ducking = gr.Checkbox(label="Enable Ducking", value=True)
-            blend_button = gr.Button("Blend Audio")
-            final_output = gr.Audio(label="Final Promo Audio", type="filepath")
-            blend_button.click(
-                fn=blend_audio,
-                inputs=[voice_path, music_path, ducking],
-                outputs=[final_output],
-            )
     gr.Markdown("""
         <hr>

         llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
         system_prompt = (
+            f"You are an expert radio imaging producer specializing in sound design and music. "
+            f"Based on the user's concept and the selected duration of {duration} seconds, craft a concise, engaging promo script. "
+            f"Ensure the script fits within the time limit and suggest a matching music style that complements the theme."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script and music suggestion:"
+        result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
+        generated_text = result[0]["generated_text"].split("Refined script and music suggestion:")[-1].strip()
+        if "Music Suggestion:" in generated_text:
+            script, music_suggestion = generated_text.split("Music Suggestion:")
+            return script.strip(), music_suggestion.strip()
+        return generated_text, "No specific music suggestion found."
     except Exception as e:
         return f"Error generating script: {e}", None
 # Voice-Over Generation Function
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=300)
+def generate_voice(script: str, speaker: str):
     try:
+        # Replace with your chosen TTS model
+        tts_model = "coqui/XTTS-v2"
         processor = AutoProcessor.from_pretrained(tts_model)
         model = AutoModelForCausalLM.from_pretrained(tts_model)
 # ---------------------------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("""
+        # 🎧 AI Promo Studio with Step-by-Step Script, Voice, Music, and Mixing 🚀
+        Generate and mix radio promos effortlessly with AI tools!
     """)
+    with gr.Row():
+        user_prompt = gr.Textbox(label="Promo Idea", placeholder="E.g., A 30-second promo for a morning show.")
+        llama_model_id = gr.Textbox(label="Llama Model ID", value="meta-llama/Meta-Llama-3-8B-Instruct")
+        duration = gr.Slider(label="Duration (seconds)", minimum=15, maximum=60, step=15, value=30)
+        audio_length = gr.Slider(label="Music Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
+        speaker = gr.Textbox(label="Voice Style (optional)", placeholder="E.g., male, female, or neutral.")
+        ducking = gr.Checkbox(label="Enable Ducking", value=True)
+    generate_script_button = gr.Button("Generate Script")
+    script_output = gr.Textbox(label="Generated Script")
+    music_suggestion_output = gr.Textbox(label="Music Suggestion")
+    generate_voice_button = gr.Button("Generate Voice")
+    voice_output = gr.Audio(label="Generated Voice", type="filepath")
+    generate_music_button = gr.Button("Generate Music")
+    music_output = gr.Audio(label="Generated Music", type="filepath")
+    blend_button = gr.Button("Blend Audio")
+    final_output = gr.Audio(label="Final Promo Audio", type="filepath")
+    def step_generate_script(user_prompt, model_id, duration):
+        return generate_script(user_prompt, model_id, hf_token, duration)
+    def step_generate_voice(script, speaker):
+        return generate_voice(script, speaker)
+    def step_generate_music(music_suggestion, audio_length):
+        return generate_music(music_suggestion, audio_length)
+    def step_blend_audio(voice_path, music_path, ducking):
+        return blend_audio(voice_path, music_path, ducking)
+    generate_script_button.click(
+        fn=lambda user_prompt, model_id, duration: generate_script(user_prompt, model_id, hf_token, duration),
+        inputs=[user_prompt, llama_model_id, duration],
+        outputs=[script_output, music_suggestion_output],
+    )
+    generate_voice_button.click(
+        fn=step_generate_voice,
+        inputs=[script_output, speaker],
+        outputs=[voice_output],
+    )
+    generate_music_button.click(
+        fn=step_generate_music,
+        inputs=[music_suggestion_output, audio_length],
+        outputs=[music_output],
+    )
+    blend_button.click(
+        fn=step_blend_audio,
+        inputs=[voice_output, music_output, ducking],
+        outputs=[final_output],
+    )
     gr.Markdown("""
         <hr>