Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Jan 12

Commit

8b6a33e

verified ·

1 Parent(s): e7b189b

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -58

app.py CHANGED Viewed

@@ -11,13 +11,13 @@ from transformers import (
 from scipy.io.wavfile import write
 import tempfile
 from dotenv import load_dotenv
-import spaces
 # Load environment variables (e.g., Hugging Face token)
 load_dotenv()
 hf_token = os.getenv("HF_TOKEN")
-# Globals for Lazy Loading
 llama_pipeline = None
 musicgen_model = None
 musicgen_processor = None
@@ -25,12 +25,14 @@ musicgen_processor = None
 # ---------------------------------------------------------------------
 # Load Llama 3 Model with Zero GPU (Lazy Loading)
 # ---------------------------------------------------------------------
-@spaces.GPU(duration=120)
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
     global llama_pipeline
     if llama_pipeline is None:
         try:
             tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
             model = AutoModelForCausalLM.from_pretrained(
                 model_id,
                 use_auth_token=token,
@@ -38,56 +40,63 @@ def load_llama_pipeline_zero_gpu(model_id: str, token: str):
                 device_map="auto",  # Automatically handles GPU allocation
                 trust_remote_code=True
             )
             llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
         except Exception as e:
-            return f"Error loading Llama pipeline: {e}"
     return llama_pipeline
-# ---------------------------------------------------------------------
-# Load MusicGen Model (Lazy Loading)
-# ---------------------------------------------------------------------
-@spaces.GPU(duration=120)
-def load_musicgen_model():
-    global musicgen_model, musicgen_processor
-    if musicgen_model is None or musicgen_processor is None:
-        try:
-            musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
-            musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
-        except Exception as e:
-            return None, f"Error loading MusicGen model: {e}"
-    return musicgen_model, musicgen_processor
 # ---------------------------------------------------------------------
 # Generate Radio Script
 # ---------------------------------------------------------------------
-def generate_script(user_input: str, llama_pipeline):
     try:
         system_prompt = (
             "You are a top-tier radio imaging producer using Llama 3. "
             "Take the user's concept and craft a short, creative promo script."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_input}\nRefined script:"
-        result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
         return result[0]['generated_text'].split("Refined script:")[-1].strip()
     except Exception as e:
         return f"Error generating script: {e}"
 # ---------------------------------------------------------------------
 # Generate Audio
 # ---------------------------------------------------------------------
-@spaces.GPU(duration=120)
 def generate_audio(prompt: str, audio_length: int):
-    mg_model, mg_processor = load_musicgen_model()
-    if mg_model is None or isinstance(mg_processor, str):
-        return mg_processor
     try:
-        mg_model.to("cuda")  # Move the model to GPU
-        inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
-        outputs = mg_model.generate(**inputs, max_new_tokens=audio_length)
-        mg_model.to("cpu")  # Return the model to CPU
-        sr = mg_model.config.audio_encoder.sampling_rate
         audio_data = outputs[0, 0].cpu().numpy()
         normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
@@ -101,16 +110,19 @@ def generate_audio(prompt: str, audio_length: int):
 # Gradio Interface
 # ---------------------------------------------------------------------
 def radio_imaging_script(user_prompt, llama_model_id):
-    llama_pipeline = load_llama_pipeline_zero_gpu(llama_model_id, hf_token)
-    if isinstance(llama_pipeline, str):
-        return llama_pipeline
     # Generate Script
-    script = generate_script(user_prompt, llama_pipeline)
     return script
 def radio_imaging_audio(script, audio_length):
-    return generate_audio(script, audio_length)
 # ---------------------------------------------------------------------
 # Interface
@@ -120,31 +132,29 @@ with gr.Blocks() as demo:
     # Script Generation Section
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("## Step 1: Generate the Promo Script")
-            user_prompt = gr.Textbox(label="Enter your promo idea", placeholder="E.g., A 15-second hype jingle for a morning talk show.")
-            llama_model_id = gr.Textbox(label="Llama 3 Model ID", value="meta-llama/Meta-Llama-3-70B")
-            generate_script_button = gr.Button("Generate Promo Script")
-            script_output = gr.Textbox(label="Generated Script", interactive=False)
-            generate_script_button.click(
-                fn=radio_imaging_script,
-                inputs=[user_prompt, llama_model_id],
-                outputs=script_output
-            )
     # Audio Generation Section
     with gr.Row():
-        with gr.Column():
-            gr.Markdown("## Step 2: Generate the Sound")
-            audio_length = gr.Slider(label="Audio Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
-            generate_audio_button = gr.Button("Generate Sound from Script")
-            audio_output = gr.Audio(label="Generated Audio", type="filepath")
-            generate_audio_button.click(
-                fn=radio_imaging_audio,
-                inputs=[script_output, audio_length],
-                outputs=audio_output
-            )
 demo.launch(debug=True)

 from scipy.io.wavfile import write
 import tempfile
 from dotenv import load_dotenv
+import spaces  # Assumes Hugging Face Spaces library supports `@spaces.GPU`
 # Load environment variables (e.g., Hugging Face token)
 load_dotenv()
 hf_token = os.getenv("HF_TOKEN")
+# Globals for lazy loading
 llama_pipeline = None
 musicgen_model = None
 musicgen_processor = None
 # ---------------------------------------------------------------------
 # Load Llama 3 Model with Zero GPU (Lazy Loading)
 # ---------------------------------------------------------------------
+@spaces.GPU(duration=300)  # Increased duration to 300 seconds
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
     global llama_pipeline
     if llama_pipeline is None:
         try:
+            print("Starting model loading...")
             tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
+            print("Tokenizer loaded.")
             model = AutoModelForCausalLM.from_pretrained(
                 model_id,
                 use_auth_token=token,
                 device_map="auto",  # Automatically handles GPU allocation
                 trust_remote_code=True
             )
+            print("Model loaded. Initializing pipeline...")
             llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
+            print("Pipeline initialized successfully.")
         except Exception as e:
+            print(f"Error loading Llama pipeline: {e}")
+            return str(e)
     return llama_pipeline
 # ---------------------------------------------------------------------
 # Generate Radio Script
 # ---------------------------------------------------------------------
+def generate_script(user_input: str, pipeline_llama):
     try:
         system_prompt = (
             "You are a top-tier radio imaging producer using Llama 3. "
             "Take the user's concept and craft a short, creative promo script."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_input}\nRefined script:"
+        result = pipeline_llama(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
         return result[0]['generated_text'].split("Refined script:")[-1].strip()
     except Exception as e:
         return f"Error generating script: {e}"
+# ---------------------------------------------------------------------
+# Load MusicGen Model (Lazy Loading)
+# ---------------------------------------------------------------------
+@spaces.GPU(duration=300)
+def load_musicgen_model():
+    global musicgen_model, musicgen_processor
+    if musicgen_model is None or musicgen_processor is None:
+        try:
+            print("Loading MusicGen model...")
+            musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+            musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+            print("MusicGen model loaded successfully.")
+        except Exception as e:
+            print(f"Error loading MusicGen model: {e}")
+            return None, str(e)
+    return musicgen_model, musicgen_processor
 # ---------------------------------------------------------------------
 # Generate Audio
 # ---------------------------------------------------------------------
+@spaces.GPU(duration=300)
 def generate_audio(prompt: str, audio_length: int):
+    global musicgen_model, musicgen_processor
+    if musicgen_model is None or musicgen_processor is None:
+        musicgen_model, musicgen_processor = load_musicgen_model()
+        if isinstance(musicgen_model, str):
+            return musicgen_model
     try:
+        musicgen_model.to("cuda")  # Move the model to GPU
+        inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt")
+        outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
+        musicgen_model.to("cpu")  # Return the model to CPU
+        sr = musicgen_model.config.audio_encoder.sampling_rate
         audio_data = outputs[0, 0].cpu().numpy()
         normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
 # Gradio Interface
 # ---------------------------------------------------------------------
 def radio_imaging_script(user_prompt, llama_model_id):
+    # Load Llama 3 Pipeline with Zero GPU
+    pipeline_llama = load_llama_pipeline_zero_gpu(llama_model_id, hf_token)
+    if isinstance(pipeline_llama, str):
+        return pipeline_llama
     # Generate Script
+    script = generate_script(user_prompt, pipeline_llama)
     return script
 def radio_imaging_audio(script, audio_length):
+    # Generate Audio
+    audio_data = generate_audio(script, audio_length)
+    return audio_data
 # ---------------------------------------------------------------------
 # Interface
     # Script Generation Section
     with gr.Row():
+        gr.Markdown("## Step 1: Generate the Promo Script")
+        user_prompt = gr.Textbox(label="Enter your promo idea", placeholder="E.g., A 15-second hype jingle for a morning talk show.")
+        llama_model_id = gr.Textbox(label="Llama 3 Model ID", value="meta-llama/Meta-Llama-3-70B")
+        generate_script_button = gr.Button("Generate Promo Script")
+        script_output = gr.Textbox(label="Generated Script", interactive=False)
+        generate_script_button.click(
+            fn=radio_imaging_script,
+            inputs=[user_prompt, llama_model_id],
+            outputs=script_output
+        )
     # Audio Generation Section
     with gr.Row():
+        gr.Markdown("## Step 2: Generate the Sound")
+        audio_length = gr.Slider(label="Audio Length (tokens)", minimum=128, maximum=1024, step=64, value=512)
+        generate_audio_button = gr.Button("Generate Sound from Script")
+        audio_output = gr.Audio(label="Generated Audio", type="filepath")
+        generate_audio_button.click(
+            fn=radio_imaging_audio,
+            inputs=[script_output, audio_length],
+            outputs=audio_output
+        )
 demo.launch(debug=True)