Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,14 @@ from transformers import (
|
|
23 |
from TTS.api import TTS
|
24 |
|
25 |
# Diffusers for sound design generation
|
26 |
-
from diffusers import DiffusionPipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# ---------------------------------------------------------------------
|
29 |
# Setup Logging and Environment Variables
|
@@ -100,13 +107,11 @@ def get_tts_model(model_name: str = "tts_models/en/ljspeech/tacotron2-DDC"):
|
|
100 |
def get_sound_design_pipeline(model_name: str, token: str):
|
101 |
"""
|
102 |
Returns a cached DiffusionPipeline for sound design if available;
|
103 |
-
otherwise, it loads and caches the pipeline using the
|
104 |
"""
|
105 |
if model_name in SOUND_DESIGN_PIPELINES:
|
106 |
return SOUND_DESIGN_PIPELINES[model_name]
|
107 |
-
|
108 |
-
from diffusers import AudioLDMPipeline
|
109 |
-
pipe = DiffusionPipeline.from_pretrained(model_name, pipeline_class=AudioLDMPipeline, use_auth_token=token)
|
110 |
SOUND_DESIGN_PIPELINES[model_name] = pipe
|
111 |
return pipe
|
112 |
|
@@ -221,7 +226,7 @@ def generate_music(prompt: str, audio_length: int):
|
|
221 |
@spaces.GPU(duration=200)
|
222 |
def generate_sound_design(prompt: str):
|
223 |
"""
|
224 |
-
Generates a sound design audio file based on the provided prompt using
|
225 |
Returns the file path to the generated .wav file.
|
226 |
"""
|
227 |
try:
|
|
|
23 |
from TTS.api import TTS
|
24 |
|
25 |
# Diffusers for sound design generation
|
26 |
+
from diffusers import DiffusionPipeline, AudioLDMPipeline
|
27 |
+
import diffusers
|
28 |
+
|
29 |
+
# Monkey-patch: Create a patched pipeline class so that any reference to AudioLDM2Pipeline is resolved correctly.
|
30 |
+
class PatchedAudioLDM2Pipeline(AudioLDMPipeline):
|
31 |
+
pass
|
32 |
+
|
33 |
+
setattr(diffusers, "AudioLDM2Pipeline", PatchedAudioLDM2Pipeline)
|
34 |
|
35 |
# ---------------------------------------------------------------------
|
36 |
# Setup Logging and Environment Variables
|
|
|
107 |
def get_sound_design_pipeline(model_name: str, token: str):
|
108 |
"""
|
109 |
Returns a cached DiffusionPipeline for sound design if available;
|
110 |
+
otherwise, it loads and caches the pipeline using the patched pipeline class.
|
111 |
"""
|
112 |
if model_name in SOUND_DESIGN_PIPELINES:
|
113 |
return SOUND_DESIGN_PIPELINES[model_name]
|
114 |
+
pipe = DiffusionPipeline.from_pretrained(model_name, pipeline_class=PatchedAudioLDM2Pipeline, use_auth_token=token)
|
|
|
|
|
115 |
SOUND_DESIGN_PIPELINES[model_name] = pipe
|
116 |
return pipe
|
117 |
|
|
|
226 |
@spaces.GPU(duration=200)
|
227 |
def generate_sound_design(prompt: str):
|
228 |
"""
|
229 |
+
Generates a sound design audio file based on the provided prompt using AudioLDM 2.
|
230 |
Returns the file path to the generated .wav file.
|
231 |
"""
|
232 |
try:
|