admin commited on
Commit
7ce0e34
·
1 Parent(s): 2176c40
Files changed (5) hide show
  1. app.py +27 -64
  2. convert.py +6 -13
  3. generate.py +3 -9
  4. utils.py +43 -0
  5. xml2abc.py +1 -1
app.py CHANGED
@@ -2,67 +2,28 @@ import os
2
  import json
3
  import shutil
4
  import argparse
5
- import warnings
6
  import gradio as gr
7
  from generate import generate_music, get_args
8
- from utils import WEIGHTS_DIR, TEMP_DIR, EN_US
9
-
10
- EN2ZH = {
11
- "Additional info & option": "附加信息及选项",
12
- "Cite": "引用",
13
- "Submit": "提交",
14
- "Feedback": "反馈",
15
- "The emotion you believe the generated result should belong to": "您所认为生成结果应该所属的情感",
16
- "Staff": "五线谱",
17
- "ABC notation": "ABC 记谱",
18
- "Download MXL": "下载 MXL",
19
- "Download MusicXML": "下载 MusicXML",
20
- "Download PDF score": "下载 PDF 乐谱",
21
- "Download MIDI": "下载 MIDI",
22
- "Audio": "音频",
23
- "Download template": "下载模板",
24
- "Save template": "保存模板",
25
- "Save": "保存",
26
- "The emotion to which the current template belongs": "当前模板所属情感",
27
- "Generate": "生成",
28
- "Generate chords coming soon": "生成和声控制暂不可用",
29
- "Volume in dB": "dB 音量调节",
30
- "±12 octave": "±12 八度上下移",
31
- "BPM tempo": "BPM 速度",
32
- "Minor": "小调",
33
- "Major": "大调",
34
- "Mode": "大小调",
35
- "Pitch SD": "音高标准差",
36
- "Low": "低",
37
- "High": "高",
38
- "By feature control": "通过特征控制生成",
39
- "By template": "通过模板生成",
40
- "Arousal: reflects the calmness-intensity of the emotion": "唤醒度 反映情绪的 平静-激烈 程度",
41
- "Valence: reflects negative-positive levels of emotion": "愉悦度 反映情绪的 消极-积极 程度",
42
- "Video demo": "视频教程",
43
- "Dataset": "数据集",
44
- "Status": "状态栏",
45
- }
46
-
47
-
48
- def _L(en_txt: str):
49
- return en_txt if EN_US else f"{en_txt} ({EN2ZH[en_txt]})"
50
 
51
 
52
  def infer_by_template(dataset: str, v: str, a: str, add_chord: bool):
53
  status = "Success"
54
  audio = midi = pdf = xml = mxl = tunes = jpg = None
55
- emotion = "Q1"
56
- if v == _L("Low") and a == _L("High"):
57
- emotion = "Q2"
 
58
 
59
- elif v == _L("Low") and a == _L("Low"):
60
- emotion = "Q3"
61
 
62
- elif v == _L("High") and a == _L("Low"):
63
- emotion = "Q4"
 
 
 
64
 
65
- try:
66
  parser = argparse.ArgumentParser()
67
  args = get_args(parser)
68
  args.template = True
@@ -89,17 +50,20 @@ def infer_by_features(
89
  ):
90
  status = "Success"
91
  audio = midi = pdf = xml = mxl = tunes = jpg = None
92
- emotion = "Q1"
93
- if mode == _L("Minor") and pitch_std == _L("High"):
94
- emotion = "Q2"
 
95
 
96
- elif mode == _L("Minor") and pitch_std == _L("Low"):
97
- emotion = "Q3"
98
 
99
- elif mode == _L("Major") and pitch_std == _L("Low"):
100
- emotion = "Q4"
 
 
 
101
 
102
- try:
103
  parser = argparse.ArgumentParser()
104
  args = get_args(parser)
105
  args.template = False
@@ -191,7 +155,6 @@ def save_template(label: str, pitch_std: str, mode: str, tempo: int, octave: int
191
 
192
 
193
  if __name__ == "__main__":
194
- warnings.filterwarnings("ignore")
195
  with gr.Blocks() as demo:
196
  if EN_US:
197
  gr.Markdown(
@@ -317,10 +280,10 @@ if __name__ == "__main__":
317
 
318
  status_bar = gr.Textbox(label=_L("Status"), show_copy_button=True)
319
  with gr.Row():
320
- mid_file = gr.File(label=_L("Download MIDI"), min_width=40)
321
- pdf_file = gr.File(label=_L("Download PDF score"), min_width=40)
322
- xml_file = gr.File(label=_L("Download MusicXML"), min_width=40)
323
- mxl_file = gr.File(label=_L("Download MXL"), min_width=40)
324
 
325
  with gr.Row():
326
  abc_txt = gr.TextArea(
 
2
  import json
3
  import shutil
4
  import argparse
 
5
  import gradio as gr
6
  from generate import generate_music, get_args
7
+ from utils import _L, WEIGHTS_DIR, TEMP_DIR, EN_US
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def infer_by_template(dataset: str, v: str, a: str, add_chord: bool):
11
  status = "Success"
12
  audio = midi = pdf = xml = mxl = tunes = jpg = None
13
+ try:
14
+ emotion = "Q1"
15
+ if v == _L("Low") and a == _L("High"):
16
+ emotion = "Q2"
17
 
18
+ elif v == _L("Low") and a == _L("Low"):
19
+ emotion = "Q3"
20
 
21
+ elif v == _L("High") and a == _L("Low"):
22
+ emotion = "Q4"
23
+
24
+ if add_chord:
25
+ print("Chord generation comes soon!")
26
 
 
27
  parser = argparse.ArgumentParser()
28
  args = get_args(parser)
29
  args.template = True
 
50
  ):
51
  status = "Success"
52
  audio = midi = pdf = xml = mxl = tunes = jpg = None
53
+ try:
54
+ emotion = "Q1"
55
+ if mode == _L("Minor") and pitch_std == _L("High"):
56
+ emotion = "Q2"
57
 
58
+ elif mode == _L("Minor") and pitch_std == _L("Low"):
59
+ emotion = "Q3"
60
 
61
+ elif mode == _L("Major") and pitch_std == _L("Low"):
62
+ emotion = "Q4"
63
+
64
+ if add_chord:
65
+ print("Chord generation comes soon!")
66
 
 
67
  parser = argparse.ArgumentParser()
68
  args = get_args(parser)
69
  args.template = False
 
155
 
156
 
157
  if __name__ == "__main__":
 
158
  with gr.Blocks() as demo:
159
  if EN_US:
160
  gr.Markdown(
 
280
 
281
  status_bar = gr.Textbox(label=_L("Status"), show_copy_button=True)
282
  with gr.Row():
283
+ mid_file = gr.File(label=_L("Download MIDI"), min_width=80)
284
+ pdf_file = gr.File(label=_L("Download PDF score"), min_width=80)
285
+ xml_file = gr.File(label=_L("Download MusicXML"), min_width=80)
286
+ mxl_file = gr.File(label=_L("Download MXL"), min_width=80)
287
 
288
  with gr.Row():
289
  abc_txt = gr.TextArea(
convert.py CHANGED
@@ -25,16 +25,13 @@ def xml2(xml_path: str, target_fmt: str):
25
  def pdf2img(pdf_path: str):
26
  output_path = pdf_path.replace(".pdf", ".jpg")
27
  doc = fitz.open(pdf_path)
28
- # 创建一个图像列表
29
- images = []
30
  for page_number in range(doc.page_count):
31
  page = doc[page_number]
32
- # 将页面渲染为图像
33
- image = page.get_pixmap()
34
- # 将图像添加到列表
35
  images.append(
36
  Image.frombytes("RGB", [image.width, image.height], image.samples)
37
- )
38
  # 竖向合并图像
39
  merged_image = Image.new(
40
  "RGB", (images[0].width, sum(image.height for image in images))
@@ -45,8 +42,7 @@ def pdf2img(pdf_path: str):
45
  y_offset += image.height
46
  # 保存合并后的图像为JPG
47
  merged_image.save(output_path, "JPEG")
48
- # 关闭PDF文档
49
- doc.close()
50
  return output_path
51
 
52
 
@@ -66,7 +62,6 @@ def xml2abc(input_xml_file: str):
66
  stdout=subprocess.PIPE,
67
  text=True,
68
  )
69
-
70
  if result.returncode == 0:
71
  return str(result.stdout).strip()
72
 
@@ -78,13 +73,11 @@ def transpose_octaves_abc(abc_notation: str, out_xml_file: str, offset=-12):
78
  if offset < 0:
79
  for part in score.parts:
80
  for measure in part.getElementsByClass(stream.Measure):
81
- # 检查当前小节的谱号
82
- if measure.clef:
83
  measure.clef = clef.BassClef()
84
 
85
  octaves_interval = interval.Interval(offset)
86
- # 遍历每个音符,将其上下移八度
87
- for note in score.recurse().notes:
88
  note.transpose(octaves_interval, inPlace=True)
89
 
90
  score.write("musicxml", fp=out_xml_file)
 
25
  def pdf2img(pdf_path: str):
26
  output_path = pdf_path.replace(".pdf", ".jpg")
27
  doc = fitz.open(pdf_path)
28
+ images = [] # 创建一个图像列表
 
29
  for page_number in range(doc.page_count):
30
  page = doc[page_number]
31
+ image = page.get_pixmap() # 将页面渲染为图像
 
 
32
  images.append(
33
  Image.frombytes("RGB", [image.width, image.height], image.samples)
34
+ ) # 将图像添加到列表
35
  # 竖向合并图像
36
  merged_image = Image.new(
37
  "RGB", (images[0].width, sum(image.height for image in images))
 
42
  y_offset += image.height
43
  # 保存合并后的图像为JPG
44
  merged_image.save(output_path, "JPEG")
45
+ doc.close() # 关闭PDF文档
 
46
  return output_path
47
 
48
 
 
62
  stdout=subprocess.PIPE,
63
  text=True,
64
  )
 
65
  if result.returncode == 0:
66
  return str(result.stdout).strip()
67
 
 
73
  if offset < 0:
74
  for part in score.parts:
75
  for measure in part.getElementsByClass(stream.Measure):
76
+ if measure.clef: # 检查当前小节的谱号
 
77
  measure.clef = clef.BassClef()
78
 
79
  octaves_interval = interval.Interval(offset)
80
+ for note in score.recurse().notes: # 遍历每个音符,将其上下移八度
 
81
  note.transpose(octaves_interval, inPlace=True)
82
 
83
  score.write("musicxml", fp=out_xml_file)
generate.py CHANGED
@@ -1,9 +1,9 @@
1
  import re
2
  import os
3
- import shutil
4
  import time
5
  import torch
6
  import random
 
7
  import argparse
8
  import soundfile as sf
9
  from transformers import GPT2Config
@@ -157,7 +157,6 @@ def generate_music(
157
  )
158
  if tune == "":
159
  tokens = None
160
-
161
  else:
162
  prefix = patchilizer.decode(input_patches[0])
163
  remaining_tokens = prompt[len(prefix) :]
@@ -201,11 +200,9 @@ def generate_music(
201
 
202
  tunes += f"{tune}\n\n"
203
  print("\n")
204
-
205
  # fix tempo
206
  if fix_tempo != None:
207
  tempo = f"Q:{fix_tempo}\n"
208
-
209
  else:
210
  tempo = f"Q:{random.randint(88, 132)}\n"
211
  if emo == "Q1":
@@ -227,19 +224,16 @@ def generate_music(
227
  tunes = tunes.replace("K:none\n", f"K:{K_val}\n")
228
 
229
  tunes = tunes.replace(f"A:{emo}\n", tempo)
230
- # fix mode:major/minor
231
- mode = "major" if emo == "Q1" or emo == "Q4" else "minor"
232
  if (mode == "major") and ("m" in K_val):
233
  tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.split('m')[0]}\n")
234
-
235
  elif (mode == "minor") and (not "m" in K_val):
236
  tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.replace('dor', '')}min\n")
237
 
238
  print("Generation time: {:.2f} seconds".format(time.time() - start_time))
239
  timestamp = time.strftime("%a_%d_%b_%Y_%H_%M_%S", time.localtime())
240
  try:
241
- # fix avg_pitch (octave)
242
- if fix_pitch != None:
243
  if fix_pitch:
244
  tunes, xml = transpose_octaves_abc(
245
  tunes,
 
1
  import re
2
  import os
 
3
  import time
4
  import torch
5
  import random
6
+ import shutil
7
  import argparse
8
  import soundfile as sf
9
  from transformers import GPT2Config
 
157
  )
158
  if tune == "":
159
  tokens = None
 
160
  else:
161
  prefix = patchilizer.decode(input_patches[0])
162
  remaining_tokens = prompt[len(prefix) :]
 
200
 
201
  tunes += f"{tune}\n\n"
202
  print("\n")
 
203
  # fix tempo
204
  if fix_tempo != None:
205
  tempo = f"Q:{fix_tempo}\n"
 
206
  else:
207
  tempo = f"Q:{random.randint(88, 132)}\n"
208
  if emo == "Q1":
 
224
  tunes = tunes.replace("K:none\n", f"K:{K_val}\n")
225
 
226
  tunes = tunes.replace(f"A:{emo}\n", tempo)
227
+ mode = "major" if emo == "Q1" or emo == "Q4" else "minor" # fix mode:major/minor
 
228
  if (mode == "major") and ("m" in K_val):
229
  tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.split('m')[0]}\n")
 
230
  elif (mode == "minor") and (not "m" in K_val):
231
  tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.replace('dor', '')}min\n")
232
 
233
  print("Generation time: {:.2f} seconds".format(time.time() - start_time))
234
  timestamp = time.strftime("%a_%d_%b_%Y_%H_%M_%S", time.localtime())
235
  try:
236
+ if fix_pitch != None: # fix avg_pitch (octave)
 
237
  if fix_pitch:
238
  tunes, xml = transpose_octaves_abc(
239
  tunes,
utils.py CHANGED
@@ -2,12 +2,15 @@ import os
2
  import sys
3
  import time
4
  import torch
 
5
  import requests
6
  import subprocess
7
  import modelscope
8
  import huggingface_hub
9
  from tqdm import tqdm
10
 
 
 
11
  TEMP_DIR = "./__pycache__"
12
  EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
13
  WEIGHTS_DIR = (
@@ -23,6 +26,46 @@ CHAR_NUM_LAYERS = 3 # Number of layers in the decoder
23
  PATCH_SAMPLING_BATCH_SIZE = 0 # Batch size for training patch, 0 for full context
24
  LOAD_FROM_CHECKPOINT = True # Whether to load weights from a checkpoint
25
  SHARE_WEIGHTS = False # Whether to share weights between the encoder and decoder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  def download(filename: str, url: str):
 
2
  import sys
3
  import time
4
  import torch
5
+ import warnings
6
  import requests
7
  import subprocess
8
  import modelscope
9
  import huggingface_hub
10
  from tqdm import tqdm
11
 
12
+ warnings.filterwarnings("ignore")
13
+
14
  TEMP_DIR = "./__pycache__"
15
  EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
16
  WEIGHTS_DIR = (
 
26
  PATCH_SAMPLING_BATCH_SIZE = 0 # Batch size for training patch, 0 for full context
27
  LOAD_FROM_CHECKPOINT = True # Whether to load weights from a checkpoint
28
  SHARE_WEIGHTS = False # Whether to share weights between the encoder and decoder
29
+ EN2ZH = {
30
+ "Low": "低",
31
+ "High": "高",
32
+ "Cite": "引用",
33
+ "Save": "保存",
34
+ "Audio": "音频",
35
+ "Minor": "小调",
36
+ "Major": "大调",
37
+ "Mode": "大小调",
38
+ "Submit": "提交",
39
+ "Staff": "五线谱",
40
+ "Status": "状态栏",
41
+ "Feedback": "反馈",
42
+ "Generate": "生成",
43
+ "Dataset": "数据集",
44
+ "BPM tempo": "BPM 速度",
45
+ "Pitch SD": "音高标准差",
46
+ "Video demo": "视频教程",
47
+ "ABC notation": "ABC 记谱",
48
+ "Download MXL": "下载 MXL",
49
+ "Save template": "保存模板",
50
+ "Download MIDI": "下载 MIDI",
51
+ "By template": "通过模板生成",
52
+ "Volume in dB": "dB 音量调节",
53
+ "±12 octave": "±12 八度上下移",
54
+ "Download template": "下载模板",
55
+ "Download MusicXML": "下载 MusicXML",
56
+ "Download PDF score": "下载 PDF 乐谱",
57
+ "By feature control": "通过特征控制生成",
58
+ "Additional info & option": "附加信息及选项",
59
+ "Generate chords coming soon": "生成和声控制暂不可用",
60
+ "The emotion to which the current template belongs": "当前模板所属情感",
61
+ "Valence: reflects negative-positive levels of emotion": "愉悦度 反映情绪的 消极-积极 程度",
62
+ "Arousal: reflects the calmness-intensity of the emotion": "唤醒度 反映情绪的 平静-激烈 程度",
63
+ "The emotion you believe the generated result should belong to": "您所认为生成结果应该所属的情感",
64
+ }
65
+
66
+
67
+ def _L(en_txt: str):
68
+ return en_txt if EN_US else f"{en_txt} ({EN2ZH[en_txt]})"
69
 
70
 
71
  def download(filename: str, url: str):
xml2abc.py CHANGED
@@ -2,7 +2,7 @@
2
  # coding=latin-1
3
  """
4
  Copyright (C) 2012-2018: W.G. Vree
5
- Contributions: M. Tarenskeen, N. Liberg, Paul Villiger, Janus Meuris, Larry Myerscough,
6
  Dick Jackson, Jan Wybren de Jong, Mark Zealey.
7
 
8
  This program is free software; you can redistribute it and/or modify it under the terms of the
 
2
  # coding=latin-1
3
  """
4
  Copyright (C) 2012-2018: W.G. Vree
5
+ Contributions: M. Tarenskeen, N. Liberg, Paul Villiger, Janus Meuris, Larry Myerscough,
6
  Dick Jackson, Jan Wybren de Jong, Mark Zealey.
7
 
8
  This program is free software; you can redistribute it and/or modify it under the terms of the