Spaces:

Bils
/

AIPromoStudio

Running on Zero

App Files Files Community

Bils commited on Jan 12

Commit

e7b189b

verified ·

1 Parent(s): a8c9cb5

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -42

app.py CHANGED Viewed

@@ -11,62 +11,76 @@ from transformers import (
 from scipy.io.wavfile import write
 import tempfile
 from dotenv import load_dotenv
-import spaces  # Assumes Hugging Face Spaces library supports `@spaces.GPU`
 # Load environment variables (e.g., Hugging Face token)
 load_dotenv()
 hf_token = os.getenv("HF_TOKEN")
 # ---------------------------------------------------------------------
-# Load Llama 3 Model with Zero GPU
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            use_auth_token=token,
-            torch_dtype=torch.float16,
-            device_map="auto",  # Automatically handles GPU allocation
-            trust_remote_code=True
-        )
-        return pipeline("text-generation", model=model, tokenizer=tokenizer)
-    except Exception as e:
-        return str(e)
 # ---------------------------------------------------------------------
 # Generate Radio Script
 # ---------------------------------------------------------------------
-def generate_script(user_input: str, pipeline_llama):
     try:
         system_prompt = (
             "You are a top-tier radio imaging producer using Llama 3. "
             "Take the user's concept and craft a short, creative promo script."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_input}\nRefined script:"
-        result = pipeline_llama(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
         return result[0]['generated_text'].split("Refined script:")[-1].strip()
     except Exception as e:
         return f"Error generating script: {e}"
-# ---------------------------------------------------------------------
-# Load MusicGen Model
-# ---------------------------------------------------------------------
-@spaces.GPU(duration=120)
-def load_musicgen_model():
-    try:
-        model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
-        processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
-        return model, processor
-    except Exception as e:
-        return None, str(e)
 # ---------------------------------------------------------------------
 # Generate Audio
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=120)
-def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
     try:
         mg_model.to("cuda")  # Move the model to GPU
         inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
@@ -87,24 +101,16 @@ def generate_audio(prompt: str, audio_length: int, mg_model, mg_processor):
 # Gradio Interface
 # ---------------------------------------------------------------------
 def radio_imaging_script(user_prompt, llama_model_id):
-    # Load Llama 3 Pipeline with Zero GPU
-    pipeline_llama = load_llama_pipeline_zero_gpu(llama_model_id, hf_token)
-    if isinstance(pipeline_llama, str):
-        return pipeline_llama
     # Generate Script
-    script = generate_script(user_prompt, pipeline_llama)
     return script
 def radio_imaging_audio(script, audio_length):
-    # Load MusicGen
-    mg_model, mg_processor = load_musicgen_model()
-    if isinstance(mg_processor, str):
-        return mg_processor
-    # Generate Audio
-    audio_data = generate_audio(script, audio_length, mg_model, mg_processor)
-    return audio_data
 # ---------------------------------------------------------------------
 # Interface

 from scipy.io.wavfile import write
 import tempfile
 from dotenv import load_dotenv
+import spaces
 # Load environment variables (e.g., Hugging Face token)
 load_dotenv()
 hf_token = os.getenv("HF_TOKEN")
+# Globals for Lazy Loading
+llama_pipeline = None
+musicgen_model = None
+musicgen_processor = None
 # ---------------------------------------------------------------------
+# Load Llama 3 Model with Zero GPU (Lazy Loading)
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def load_llama_pipeline_zero_gpu(model_id: str, token: str):
+    global llama_pipeline
+    if llama_pipeline is None:
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
+            model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                use_auth_token=token,
+                torch_dtype=torch.float16,
+                device_map="auto",  # Automatically handles GPU allocation
+                trust_remote_code=True
+            )
+            llama_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
+        except Exception as e:
+            return f"Error loading Llama pipeline: {e}"
+    return llama_pipeline
+# ---------------------------------------------------------------------
+# Load MusicGen Model (Lazy Loading)
+# ---------------------------------------------------------------------
+@spaces.GPU(duration=120)
+def load_musicgen_model():
+    global musicgen_model, musicgen_processor
+    if musicgen_model is None or musicgen_processor is None:
+        try:
+            musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
+            musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
+        except Exception as e:
+            return None, f"Error loading MusicGen model: {e}"
+    return musicgen_model, musicgen_processor
 # ---------------------------------------------------------------------
 # Generate Radio Script
 # ---------------------------------------------------------------------
+def generate_script(user_input: str, llama_pipeline):
     try:
         system_prompt = (
             "You are a top-tier radio imaging producer using Llama 3. "
             "Take the user's concept and craft a short, creative promo script."
         )
         combined_prompt = f"{system_prompt}\nUser concept: {user_input}\nRefined script:"
+        result = llama_pipeline(combined_prompt, max_new_tokens=200, do_sample=True, temperature=0.9)
         return result[0]['generated_text'].split("Refined script:")[-1].strip()
     except Exception as e:
         return f"Error generating script: {e}"
 # ---------------------------------------------------------------------
 # Generate Audio
 # ---------------------------------------------------------------------
 @spaces.GPU(duration=120)
+def generate_audio(prompt: str, audio_length: int):
+    mg_model, mg_processor = load_musicgen_model()
+    if mg_model is None or isinstance(mg_processor, str):
+        return mg_processor
     try:
         mg_model.to("cuda")  # Move the model to GPU
         inputs = mg_processor(text=[prompt], padding=True, return_tensors="pt")
 # Gradio Interface
 # ---------------------------------------------------------------------
 def radio_imaging_script(user_prompt, llama_model_id):
+    llama_pipeline = load_llama_pipeline_zero_gpu(llama_model_id, hf_token)
+    if isinstance(llama_pipeline, str):
+        return llama_pipeline
     # Generate Script
+    script = generate_script(user_prompt, llama_pipeline)
     return script
 def radio_imaging_audio(script, audio_length):
+    return generate_audio(script, audio_length)
 # ---------------------------------------------------------------------
 # Interface