eusip commited on
Commit
0778d14
·
verified ·
1 Parent(s): c4a9815

Update demos/musicgen_app.py

Browse files
Files changed (1) hide show
  1. demos/musicgen_app.py +74 -70
demos/musicgen_app.py CHANGED
@@ -93,15 +93,16 @@ def make_waveform(*args, **kwargs):
93
  return out
94
 
95
 
96
- def load_model(version="facebook/musicgen-melody"):
97
  global MODEL
98
- print("Loading model", version)
99
  if MODEL is None or MODEL.name != version:
100
  # Clear PyTorch CUDA cache and delete model
101
  del MODEL
102
  torch.cuda.empty_cache()
103
  MODEL = None # in case loading would crash
104
- MODEL = MusicGen.get_pretrained(version)
 
105
 
106
 
107
  def load_diffusion():
@@ -290,9 +291,10 @@ def ui_full(launch_kwargs):
290
  gr.Markdown(
291
  """
292
  # MusicGen
293
- This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
294
  a simple and controllable model for music generation
295
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
 
296
  """
297
  )
298
  with gr.Row():
@@ -316,25 +318,25 @@ def ui_full(launch_kwargs):
316
  submit = gr.Button("Submit")
317
  # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
318
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
319
- with gr.Row():
320
- model = gr.Radio(
321
- [
322
- "facebook/musicgen-melody",
323
- "facebook/musicgen-medium",
324
- "facebook/musicgen-small",
325
- "facebook/musicgen-large",
326
- "facebook/musicgen-melody-large",
327
- "facebook/musicgen-stereo-small",
328
- "facebook/musicgen-stereo-medium",
329
- "facebook/musicgen-stereo-melody",
330
- "facebook/musicgen-stereo-large",
331
- "facebook/musicgen-stereo-melody-large",
332
- ],
333
- label="Model",
334
- value="facebook/musicgen-stereo-melody",
335
- interactive=True,
336
- )
337
- model_path = gr.Text(label="Model Path (custom models)")
338
  with gr.Row():
339
  decoder = gr.Radio(
340
  ["Default", "MultiBand_Diffusion"],
@@ -375,8 +377,8 @@ def ui_full(launch_kwargs):
375
  ).then(
376
  predict_full,
377
  inputs=[
378
- model,
379
- model_path,
380
  decoder,
381
  text,
382
  melody,
@@ -392,49 +394,49 @@ def ui_full(launch_kwargs):
392
  toggle_audio_src, radio, [melody], queue=False, show_progress=False
393
  )
394
 
395
- gr.Examples(
396
- fn=predict_full,
397
- examples=[
398
- [
399
- "An 80s driving pop song with heavy drums and synth pads in the background",
400
- "./assets/bach.mp3",
401
- "facebook/musicgen-stereo-melody",
402
- "Default",
403
- ],
404
- [
405
- "A cheerful country song with acoustic guitars",
406
- "./assets/bolero_ravel.mp3",
407
- "facebook/musicgen-stereo-melody",
408
- "Default",
409
- ],
410
- [
411
- "90s rock song with electric guitar and heavy drums",
412
- None,
413
- "facebook/musicgen-stereo-medium",
414
- "Default",
415
- ],
416
- [
417
- "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
418
- "./assets/bach.mp3",
419
- "facebook/musicgen-stereo-melody",
420
- "Default",
421
- ],
422
- [
423
- "lofi slow bpm electro chill with organic samples",
424
- None,
425
- "facebook/musicgen-stereo-medium",
426
- "Default",
427
- ],
428
- [
429
- "Punk rock with loud drum and power guitar",
430
- None,
431
- "facebook/musicgen-stereo-medium",
432
- "MultiBand_Diffusion",
433
- ],
434
- ],
435
- inputs=[text, melody, model, decoder],
436
- outputs=[output],
437
- )
438
  gr.Markdown(
439
  """
440
  ### More details
@@ -459,7 +461,7 @@ def ui_full(launch_kwargs):
459
  An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
460
  are generated each time.
461
 
462
- We present 10 model variations:
463
  1. facebook/musicgen-melody -- a music generation model capable of generating music condition
464
  on text and melody inputs. **Note**, you can also use text only.
465
  2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
@@ -468,6 +470,8 @@ def ui_full(launch_kwargs):
468
  5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on and melody.
469
  6. facebook/musicgen-stereo-*: same as the previous models but fine tuned to output stereo audio.
470
 
 
 
471
  We also present two way of decoding the audio tokens
472
  1. Use the default GAN based compression model. It can suffer from artifacts especially
473
  for crashes, snares etc.
 
93
  return out
94
 
95
 
96
+ def load_model(version="facebook/musicgen-small"):
97
  global MODEL
98
+ print("Loading Musivesal musicgen-small") # , version
99
  if MODEL is None or MODEL.name != version:
100
  # Clear PyTorch CUDA cache and delete model
101
  del MODEL
102
  torch.cuda.empty_cache()
103
  MODEL = None # in case loading would crash
104
+ MODEL = MusicGen.get_pretrained("/data")
105
+ print("Custom model loaded.")
106
 
107
 
108
  def load_diffusion():
 
291
  gr.Markdown(
292
  """
293
  # MusicGen
294
+ This is a private demo of [MusicGen](https://github.com/facebookresearch/audiocraft),
295
  a simple and controllable model for music generation
296
+ presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
297
+ This Space hosts **"facebook/musicgen-small"**. It has been finetuned on a proprietary keyboard dataset from [Musiversal](https://musiversal.com/).
298
  """
299
  )
300
  with gr.Row():
 
318
  submit = gr.Button("Submit")
319
  # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
320
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
321
+ # with gr.Row():
322
+ # model = gr.Radio(
323
+ # [
324
+ # "facebook/musicgen-melody",
325
+ # "facebook/musicgen-medium",
326
+ # "facebook/musicgen-small",
327
+ # "facebook/musicgen-large",
328
+ # "facebook/musicgen-melody-large",
329
+ # "facebook/musicgen-stereo-small",
330
+ # "facebook/musicgen-stereo-medium",
331
+ # "facebook/musicgen-stereo-melody",
332
+ # "facebook/musicgen-stereo-large",
333
+ # "facebook/musicgen-stereo-melody-large",
334
+ # ],
335
+ # label="Model",
336
+ # value="facebook/musicgen-stereo-melody",
337
+ # interactive=True,
338
+ # )
339
+ # model_path = gr.Text(label="Model Path (custom models)")
340
  with gr.Row():
341
  decoder = gr.Radio(
342
  ["Default", "MultiBand_Diffusion"],
 
377
  ).then(
378
  predict_full,
379
  inputs=[
380
+ # model,
381
+ # model_path,
382
  decoder,
383
  text,
384
  melody,
 
394
  toggle_audio_src, radio, [melody], queue=False, show_progress=False
395
  )
396
 
397
+ # gr.Examples(
398
+ # fn=predict_full,
399
+ # examples=[
400
+ # [
401
+ # "An 80s driving pop song with heavy drums and synth pads in the background",
402
+ # "./assets/bach.mp3",
403
+ # "facebook/musicgen-stereo-melody",
404
+ # "Default",
405
+ # ],
406
+ # [
407
+ # "A cheerful country song with acoustic guitars",
408
+ # "./assets/bolero_ravel.mp3",
409
+ # "facebook/musicgen-stereo-melody",
410
+ # "Default",
411
+ # ],
412
+ # [
413
+ # "90s rock song with electric guitar and heavy drums",
414
+ # None,
415
+ # "facebook/musicgen-stereo-medium",
416
+ # "Default",
417
+ # ],
418
+ # [
419
+ # "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
420
+ # "./assets/bach.mp3",
421
+ # "facebook/musicgen-stereo-melody",
422
+ # "Default",
423
+ # ],
424
+ # [
425
+ # "lofi slow bpm electro chill with organic samples",
426
+ # None,
427
+ # "facebook/musicgen-stereo-medium",
428
+ # "Default",
429
+ # ],
430
+ # [
431
+ # "Punk rock with loud drum and power guitar",
432
+ # None,
433
+ # "facebook/musicgen-stereo-medium",
434
+ # "MultiBand_Diffusion",
435
+ # ],
436
+ # ],
437
+ # inputs=[text, melody, model, decoder],
438
+ # outputs=[output],
439
+ # )
440
  gr.Markdown(
441
  """
442
  ### More details
 
461
  An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
462
  are generated each time.
463
 
464
+ There 10 model variations:
465
  1. facebook/musicgen-melody -- a music generation model capable of generating music condition
466
  on text and melody inputs. **Note**, you can also use text only.
467
  2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
 
470
  5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on and melody.
471
  6. facebook/musicgen-stereo-*: same as the previous models but fine tuned to output stereo audio.
472
 
473
+ **This is space only provides Musiversal's finetuning of 'facebook/musicgen-small'.**
474
+
475
  We also present two way of decoding the audio tokens
476
  1. Use the default GAN based compression model. It can suffer from artifacts especially
477
  for crashes, snares etc.