Update demos/musicgen_app.py
Browse files- demos/musicgen_app.py +74 -70
demos/musicgen_app.py
CHANGED
@@ -93,15 +93,16 @@ def make_waveform(*args, **kwargs):
|
|
93 |
return out
|
94 |
|
95 |
|
96 |
-
def load_model(version="facebook/musicgen-
|
97 |
global MODEL
|
98 |
-
print("Loading
|
99 |
if MODEL is None or MODEL.name != version:
|
100 |
# Clear PyTorch CUDA cache and delete model
|
101 |
del MODEL
|
102 |
torch.cuda.empty_cache()
|
103 |
MODEL = None # in case loading would crash
|
104 |
-
MODEL = MusicGen.get_pretrained(
|
|
|
105 |
|
106 |
|
107 |
def load_diffusion():
|
@@ -290,9 +291,10 @@ def ui_full(launch_kwargs):
|
|
290 |
gr.Markdown(
|
291 |
"""
|
292 |
# MusicGen
|
293 |
-
This is
|
294 |
a simple and controllable model for music generation
|
295 |
-
presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
|
|
|
296 |
"""
|
297 |
)
|
298 |
with gr.Row():
|
@@ -316,25 +318,25 @@ def ui_full(launch_kwargs):
|
|
316 |
submit = gr.Button("Submit")
|
317 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
318 |
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
319 |
-
with gr.Row():
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
with gr.Row():
|
339 |
decoder = gr.Radio(
|
340 |
["Default", "MultiBand_Diffusion"],
|
@@ -375,8 +377,8 @@ def ui_full(launch_kwargs):
|
|
375 |
).then(
|
376 |
predict_full,
|
377 |
inputs=[
|
378 |
-
model,
|
379 |
-
model_path,
|
380 |
decoder,
|
381 |
text,
|
382 |
melody,
|
@@ -392,49 +394,49 @@ def ui_full(launch_kwargs):
|
|
392 |
toggle_audio_src, radio, [melody], queue=False, show_progress=False
|
393 |
)
|
394 |
|
395 |
-
gr.Examples(
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
)
|
438 |
gr.Markdown(
|
439 |
"""
|
440 |
### More details
|
@@ -459,7 +461,7 @@ def ui_full(launch_kwargs):
|
|
459 |
An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
|
460 |
are generated each time.
|
461 |
|
462 |
-
|
463 |
1. facebook/musicgen-melody -- a music generation model capable of generating music condition
|
464 |
on text and melody inputs. **Note**, you can also use text only.
|
465 |
2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
|
@@ -468,6 +470,8 @@ def ui_full(launch_kwargs):
|
|
468 |
5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on and melody.
|
469 |
6. facebook/musicgen-stereo-*: same as the previous models but fine tuned to output stereo audio.
|
470 |
|
|
|
|
|
471 |
We also present two way of decoding the audio tokens
|
472 |
1. Use the default GAN based compression model. It can suffer from artifacts especially
|
473 |
for crashes, snares etc.
|
|
|
93 |
return out
|
94 |
|
95 |
|
96 |
+
def load_model(version="facebook/musicgen-small"):
|
97 |
global MODEL
|
98 |
+
print("Loading Musivesal musicgen-small") # , version
|
99 |
if MODEL is None or MODEL.name != version:
|
100 |
# Clear PyTorch CUDA cache and delete model
|
101 |
del MODEL
|
102 |
torch.cuda.empty_cache()
|
103 |
MODEL = None # in case loading would crash
|
104 |
+
MODEL = MusicGen.get_pretrained("/data")
|
105 |
+
print("Custom model loaded.")
|
106 |
|
107 |
|
108 |
def load_diffusion():
|
|
|
291 |
gr.Markdown(
|
292 |
"""
|
293 |
# MusicGen
|
294 |
+
This is a private demo of [MusicGen](https://github.com/facebookresearch/audiocraft),
|
295 |
a simple and controllable model for music generation
|
296 |
+
presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
|
297 |
+
This Space hosts **"facebook/musicgen-small"**. It has been finetuned on a proprietary keyboard dataset from [Musiversal](https://musiversal.com/).
|
298 |
"""
|
299 |
)
|
300 |
with gr.Row():
|
|
|
318 |
submit = gr.Button("Submit")
|
319 |
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
320 |
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
321 |
+
# with gr.Row():
|
322 |
+
# model = gr.Radio(
|
323 |
+
# [
|
324 |
+
# "facebook/musicgen-melody",
|
325 |
+
# "facebook/musicgen-medium",
|
326 |
+
# "facebook/musicgen-small",
|
327 |
+
# "facebook/musicgen-large",
|
328 |
+
# "facebook/musicgen-melody-large",
|
329 |
+
# "facebook/musicgen-stereo-small",
|
330 |
+
# "facebook/musicgen-stereo-medium",
|
331 |
+
# "facebook/musicgen-stereo-melody",
|
332 |
+
# "facebook/musicgen-stereo-large",
|
333 |
+
# "facebook/musicgen-stereo-melody-large",
|
334 |
+
# ],
|
335 |
+
# label="Model",
|
336 |
+
# value="facebook/musicgen-stereo-melody",
|
337 |
+
# interactive=True,
|
338 |
+
# )
|
339 |
+
# model_path = gr.Text(label="Model Path (custom models)")
|
340 |
with gr.Row():
|
341 |
decoder = gr.Radio(
|
342 |
["Default", "MultiBand_Diffusion"],
|
|
|
377 |
).then(
|
378 |
predict_full,
|
379 |
inputs=[
|
380 |
+
# model,
|
381 |
+
# model_path,
|
382 |
decoder,
|
383 |
text,
|
384 |
melody,
|
|
|
394 |
toggle_audio_src, radio, [melody], queue=False, show_progress=False
|
395 |
)
|
396 |
|
397 |
+
# gr.Examples(
|
398 |
+
# fn=predict_full,
|
399 |
+
# examples=[
|
400 |
+
# [
|
401 |
+
# "An 80s driving pop song with heavy drums and synth pads in the background",
|
402 |
+
# "./assets/bach.mp3",
|
403 |
+
# "facebook/musicgen-stereo-melody",
|
404 |
+
# "Default",
|
405 |
+
# ],
|
406 |
+
# [
|
407 |
+
# "A cheerful country song with acoustic guitars",
|
408 |
+
# "./assets/bolero_ravel.mp3",
|
409 |
+
# "facebook/musicgen-stereo-melody",
|
410 |
+
# "Default",
|
411 |
+
# ],
|
412 |
+
# [
|
413 |
+
# "90s rock song with electric guitar and heavy drums",
|
414 |
+
# None,
|
415 |
+
# "facebook/musicgen-stereo-medium",
|
416 |
+
# "Default",
|
417 |
+
# ],
|
418 |
+
# [
|
419 |
+
# "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
|
420 |
+
# "./assets/bach.mp3",
|
421 |
+
# "facebook/musicgen-stereo-melody",
|
422 |
+
# "Default",
|
423 |
+
# ],
|
424 |
+
# [
|
425 |
+
# "lofi slow bpm electro chill with organic samples",
|
426 |
+
# None,
|
427 |
+
# "facebook/musicgen-stereo-medium",
|
428 |
+
# "Default",
|
429 |
+
# ],
|
430 |
+
# [
|
431 |
+
# "Punk rock with loud drum and power guitar",
|
432 |
+
# None,
|
433 |
+
# "facebook/musicgen-stereo-medium",
|
434 |
+
# "MultiBand_Diffusion",
|
435 |
+
# ],
|
436 |
+
# ],
|
437 |
+
# inputs=[text, melody, model, decoder],
|
438 |
+
# outputs=[output],
|
439 |
+
# )
|
440 |
gr.Markdown(
|
441 |
"""
|
442 |
### More details
|
|
|
461 |
An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
|
462 |
are generated each time.
|
463 |
|
464 |
+
There 10 model variations:
|
465 |
1. facebook/musicgen-melody -- a music generation model capable of generating music condition
|
466 |
on text and melody inputs. **Note**, you can also use text only.
|
467 |
2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
|
|
|
470 |
5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on and melody.
|
471 |
6. facebook/musicgen-stereo-*: same as the previous models but fine tuned to output stereo audio.
|
472 |
|
473 |
+
**This is space only provides Musiversal's finetuning of 'facebook/musicgen-small'.**
|
474 |
+
|
475 |
We also present two way of decoding the audio tokens
|
476 |
1. Use the default GAN based compression model. It can suffer from artifacts especially
|
477 |
for crashes, snares etc.
|