MusicGen

Paused

App Files Files Community

eusip commited on May 7, 2024

Commit

0778d14

verified ·

1 Parent(s): c4a9815

Update demos/musicgen_app.py

Browse files

Files changed (1) hide show

demos/musicgen_app.py +74 -70

demos/musicgen_app.py CHANGED Viewed

@@ -93,15 +93,16 @@ def make_waveform(*args, **kwargs):
         return out
-def load_model(version="facebook/musicgen-melody"):
     global MODEL
-    print("Loading model", version)
     if MODEL is None or MODEL.name != version:
         # Clear PyTorch CUDA cache and delete model
         del MODEL
         torch.cuda.empty_cache()
         MODEL = None  # in case loading would crash
-        MODEL = MusicGen.get_pretrained(version)
 def load_diffusion():
@@ -290,9 +291,10 @@ def ui_full(launch_kwargs):
         gr.Markdown(
             """
             # MusicGen
-            This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
             a simple and controllable model for music generation
-            presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
             """
         )
         with gr.Row():
@@ -316,25 +318,25 @@ def ui_full(launch_kwargs):
                     submit = gr.Button("Submit")
                     # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
                     _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
-                with gr.Row():
-                    model = gr.Radio(
-                        [
-                            "facebook/musicgen-melody",
-                            "facebook/musicgen-medium",
-                            "facebook/musicgen-small",
-                            "facebook/musicgen-large",
-                            "facebook/musicgen-melody-large",
-                            "facebook/musicgen-stereo-small",
-                            "facebook/musicgen-stereo-medium",
-                            "facebook/musicgen-stereo-melody",
-                            "facebook/musicgen-stereo-large",
-                            "facebook/musicgen-stereo-melody-large",
-                        ],
-                        label="Model",
-                        value="facebook/musicgen-stereo-melody",
-                        interactive=True,
-                    )
-                    model_path = gr.Text(label="Model Path (custom models)")
                 with gr.Row():
                     decoder = gr.Radio(
                         ["Default", "MultiBand_Diffusion"],
@@ -375,8 +377,8 @@ def ui_full(launch_kwargs):
         ).then(
             predict_full,
             inputs=[
-                model,
-                model_path,
                 decoder,
                 text,
                 melody,
@@ -392,49 +394,49 @@ def ui_full(launch_kwargs):
             toggle_audio_src, radio, [melody], queue=False, show_progress=False
         )
-        gr.Examples(
-            fn=predict_full,
-            examples=[
-                [
-                    "An 80s driving pop song with heavy drums and synth pads in the background",
-                    "./assets/bach.mp3",
-                    "facebook/musicgen-stereo-melody",
-                    "Default",
-                ],
-                [
-                    "A cheerful country song with acoustic guitars",
-                    "./assets/bolero_ravel.mp3",
-                    "facebook/musicgen-stereo-melody",
-                    "Default",
-                ],
-                [
-                    "90s rock song with electric guitar and heavy drums",
-                    None,
-                    "facebook/musicgen-stereo-medium",
-                    "Default",
-                ],
-                [
-                    "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
-                    "./assets/bach.mp3",
-                    "facebook/musicgen-stereo-melody",
-                    "Default",
-                ],
-                [
-                    "lofi slow bpm electro chill with organic samples",
-                    None,
-                    "facebook/musicgen-stereo-medium",
-                    "Default",
-                ],
-                [
-                    "Punk rock with loud drum and power guitar",
-                    None,
-                    "facebook/musicgen-stereo-medium",
-                    "MultiBand_Diffusion",
-                ],
-            ],
-            inputs=[text, melody, model, decoder],
-            outputs=[output],
-        )
         gr.Markdown(
             """
             ### More details
@@ -459,7 +461,7 @@ def ui_full(launch_kwargs):
             An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
             are generated each time.
-            We present 10 model variations:
             1. facebook/musicgen-melody -- a music generation model capable of generating music condition
                 on text and melody inputs. **Note**, you can also use text only.
             2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
@@ -468,6 +470,8 @@ def ui_full(launch_kwargs):
             5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on and melody.
             6. facebook/musicgen-stereo-*: same as the previous models but fine tuned to output stereo audio.
             We also present two way of decoding the audio tokens
             1. Use the default GAN based compression model. It can suffer from artifacts especially
                 for crashes, snares etc.

         return out
+def load_model(version="facebook/musicgen-small"):
     global MODEL
+    print("Loading Musivesal musicgen-small")  # , version
     if MODEL is None or MODEL.name != version:
         # Clear PyTorch CUDA cache and delete model
         del MODEL
         torch.cuda.empty_cache()
         MODEL = None  # in case loading would crash
+        MODEL = MusicGen.get_pretrained("/data")
+        print("Custom model loaded.")
 def load_diffusion():
         gr.Markdown(
             """
             # MusicGen
+            This is a private demo of [MusicGen](https://github.com/facebookresearch/audiocraft),
             a simple and controllable model for music generation
+            presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
+            This Space hosts **"facebook/musicgen-small"**. It has been finetuned on a proprietary keyboard dataset from [Musiversal](https://musiversal.com/).
             """
         )
         with gr.Row():
                     submit = gr.Button("Submit")
                     # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
                     _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
+                # with gr.Row():
+                #     model = gr.Radio(
+                #         [
+                #             "facebook/musicgen-melody",
+                #             "facebook/musicgen-medium",
+                #             "facebook/musicgen-small",
+                #             "facebook/musicgen-large",
+                #             "facebook/musicgen-melody-large",
+                #             "facebook/musicgen-stereo-small",
+                #             "facebook/musicgen-stereo-medium",
+                #             "facebook/musicgen-stereo-melody",
+                #             "facebook/musicgen-stereo-large",
+                #             "facebook/musicgen-stereo-melody-large",
+                #         ],
+                #         label="Model",
+                #         value="facebook/musicgen-stereo-melody",
+                #         interactive=True,
+                #     )
+                #     model_path = gr.Text(label="Model Path (custom models)")
                 with gr.Row():
                     decoder = gr.Radio(
                         ["Default", "MultiBand_Diffusion"],
         ).then(
             predict_full,
             inputs=[
+                # model,
+                # model_path,
                 decoder,
                 text,
                 melody,
             toggle_audio_src, radio, [melody], queue=False, show_progress=False
         )
+        # gr.Examples(
+        #     fn=predict_full,
+        #     examples=[
+        #         [
+        #             "An 80s driving pop song with heavy drums and synth pads in the background",
+        #             "./assets/bach.mp3",
+        #             "facebook/musicgen-stereo-melody",
+        #             "Default",
+        #         ],
+        #         [
+        #             "A cheerful country song with acoustic guitars",
+        #             "./assets/bolero_ravel.mp3",
+        #             "facebook/musicgen-stereo-melody",
+        #             "Default",
+        #         ],
+        #         [
+        #             "90s rock song with electric guitar and heavy drums",
+        #             None,
+        #             "facebook/musicgen-stereo-medium",
+        #             "Default",
+        #         ],
+        #         [
+        #             "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
+        #             "./assets/bach.mp3",
+        #             "facebook/musicgen-stereo-melody",
+        #             "Default",
+        #         ],
+        #         [
+        #             "lofi slow bpm electro chill with organic samples",
+        #             None,
+        #             "facebook/musicgen-stereo-medium",
+        #             "Default",
+        #         ],
+        #         [
+        #             "Punk rock with loud drum and power guitar",
+        #             None,
+        #             "facebook/musicgen-stereo-medium",
+        #             "MultiBand_Diffusion",
+        #         ],
+        #     ],
+        #     inputs=[text, melody, model, decoder],
+        #     outputs=[output],
+        # )
         gr.Markdown(
             """
             ### More details
             An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
             are generated each time.
+            There 10 model variations:
             1. facebook/musicgen-melody -- a music generation model capable of generating music condition
                 on text and melody inputs. **Note**, you can also use text only.
             2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
             5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on and melody.
             6. facebook/musicgen-stereo-*: same as the previous models but fine tuned to output stereo audio.
+            **This is space only provides Musiversal's finetuning of 'facebook/musicgen-small'.**
             We also present two way of decoding the audio tokens
             1. Use the default GAN based compression model. It can suffer from artifacts especially
                 for crashes, snares etc.