Spaces:
Running
Running
admin
commited on
Commit
·
7ce0e34
1
Parent(s):
2176c40
sync ms
Browse files- app.py +27 -64
- convert.py +6 -13
- generate.py +3 -9
- utils.py +43 -0
- xml2abc.py +1 -1
app.py
CHANGED
@@ -2,67 +2,28 @@ import os
|
|
2 |
import json
|
3 |
import shutil
|
4 |
import argparse
|
5 |
-
import warnings
|
6 |
import gradio as gr
|
7 |
from generate import generate_music, get_args
|
8 |
-
from utils import WEIGHTS_DIR, TEMP_DIR, EN_US
|
9 |
-
|
10 |
-
EN2ZH = {
|
11 |
-
"Additional info & option": "附加信息及选项",
|
12 |
-
"Cite": "引用",
|
13 |
-
"Submit": "提交",
|
14 |
-
"Feedback": "反馈",
|
15 |
-
"The emotion you believe the generated result should belong to": "您所认为生成结果应该所属的情感",
|
16 |
-
"Staff": "五线谱",
|
17 |
-
"ABC notation": "ABC 记谱",
|
18 |
-
"Download MXL": "下载 MXL",
|
19 |
-
"Download MusicXML": "下载 MusicXML",
|
20 |
-
"Download PDF score": "下载 PDF 乐谱",
|
21 |
-
"Download MIDI": "下载 MIDI",
|
22 |
-
"Audio": "音频",
|
23 |
-
"Download template": "下载模板",
|
24 |
-
"Save template": "保存模板",
|
25 |
-
"Save": "保存",
|
26 |
-
"The emotion to which the current template belongs": "当前模板所属情感",
|
27 |
-
"Generate": "生成",
|
28 |
-
"Generate chords coming soon": "生成和声控制暂不可用",
|
29 |
-
"Volume in dB": "dB 音量调节",
|
30 |
-
"±12 octave": "±12 八度上下移",
|
31 |
-
"BPM tempo": "BPM 速度",
|
32 |
-
"Minor": "小调",
|
33 |
-
"Major": "大调",
|
34 |
-
"Mode": "大小调",
|
35 |
-
"Pitch SD": "音高标准差",
|
36 |
-
"Low": "低",
|
37 |
-
"High": "高",
|
38 |
-
"By feature control": "通过特征控制生成",
|
39 |
-
"By template": "通过模板生成",
|
40 |
-
"Arousal: reflects the calmness-intensity of the emotion": "唤醒度 反映情绪的 平静-激烈 程度",
|
41 |
-
"Valence: reflects negative-positive levels of emotion": "愉悦度 反映情绪的 消极-积极 程度",
|
42 |
-
"Video demo": "视频教程",
|
43 |
-
"Dataset": "数据集",
|
44 |
-
"Status": "状态栏",
|
45 |
-
}
|
46 |
-
|
47 |
-
|
48 |
-
def _L(en_txt: str):
|
49 |
-
return en_txt if EN_US else f"{en_txt} ({EN2ZH[en_txt]})"
|
50 |
|
51 |
|
52 |
def infer_by_template(dataset: str, v: str, a: str, add_chord: bool):
|
53 |
status = "Success"
|
54 |
audio = midi = pdf = xml = mxl = tunes = jpg = None
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
58 |
|
59 |
-
|
60 |
-
|
61 |
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
64 |
|
65 |
-
try:
|
66 |
parser = argparse.ArgumentParser()
|
67 |
args = get_args(parser)
|
68 |
args.template = True
|
@@ -89,17 +50,20 @@ def infer_by_features(
|
|
89 |
):
|
90 |
status = "Success"
|
91 |
audio = midi = pdf = xml = mxl = tunes = jpg = None
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
95 |
|
96 |
-
|
97 |
-
|
98 |
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
101 |
|
102 |
-
try:
|
103 |
parser = argparse.ArgumentParser()
|
104 |
args = get_args(parser)
|
105 |
args.template = False
|
@@ -191,7 +155,6 @@ def save_template(label: str, pitch_std: str, mode: str, tempo: int, octave: int
|
|
191 |
|
192 |
|
193 |
if __name__ == "__main__":
|
194 |
-
warnings.filterwarnings("ignore")
|
195 |
with gr.Blocks() as demo:
|
196 |
if EN_US:
|
197 |
gr.Markdown(
|
@@ -317,10 +280,10 @@ if __name__ == "__main__":
|
|
317 |
|
318 |
status_bar = gr.Textbox(label=_L("Status"), show_copy_button=True)
|
319 |
with gr.Row():
|
320 |
-
mid_file = gr.File(label=_L("Download MIDI"), min_width=
|
321 |
-
pdf_file = gr.File(label=_L("Download PDF score"), min_width=
|
322 |
-
xml_file = gr.File(label=_L("Download MusicXML"), min_width=
|
323 |
-
mxl_file = gr.File(label=_L("Download MXL"), min_width=
|
324 |
|
325 |
with gr.Row():
|
326 |
abc_txt = gr.TextArea(
|
|
|
2 |
import json
|
3 |
import shutil
|
4 |
import argparse
|
|
|
5 |
import gradio as gr
|
6 |
from generate import generate_music, get_args
|
7 |
+
from utils import _L, WEIGHTS_DIR, TEMP_DIR, EN_US
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
|
10 |
def infer_by_template(dataset: str, v: str, a: str, add_chord: bool):
|
11 |
status = "Success"
|
12 |
audio = midi = pdf = xml = mxl = tunes = jpg = None
|
13 |
+
try:
|
14 |
+
emotion = "Q1"
|
15 |
+
if v == _L("Low") and a == _L("High"):
|
16 |
+
emotion = "Q2"
|
17 |
|
18 |
+
elif v == _L("Low") and a == _L("Low"):
|
19 |
+
emotion = "Q3"
|
20 |
|
21 |
+
elif v == _L("High") and a == _L("Low"):
|
22 |
+
emotion = "Q4"
|
23 |
+
|
24 |
+
if add_chord:
|
25 |
+
print("Chord generation comes soon!")
|
26 |
|
|
|
27 |
parser = argparse.ArgumentParser()
|
28 |
args = get_args(parser)
|
29 |
args.template = True
|
|
|
50 |
):
|
51 |
status = "Success"
|
52 |
audio = midi = pdf = xml = mxl = tunes = jpg = None
|
53 |
+
try:
|
54 |
+
emotion = "Q1"
|
55 |
+
if mode == _L("Minor") and pitch_std == _L("High"):
|
56 |
+
emotion = "Q2"
|
57 |
|
58 |
+
elif mode == _L("Minor") and pitch_std == _L("Low"):
|
59 |
+
emotion = "Q3"
|
60 |
|
61 |
+
elif mode == _L("Major") and pitch_std == _L("Low"):
|
62 |
+
emotion = "Q4"
|
63 |
+
|
64 |
+
if add_chord:
|
65 |
+
print("Chord generation comes soon!")
|
66 |
|
|
|
67 |
parser = argparse.ArgumentParser()
|
68 |
args = get_args(parser)
|
69 |
args.template = False
|
|
|
155 |
|
156 |
|
157 |
if __name__ == "__main__":
|
|
|
158 |
with gr.Blocks() as demo:
|
159 |
if EN_US:
|
160 |
gr.Markdown(
|
|
|
280 |
|
281 |
status_bar = gr.Textbox(label=_L("Status"), show_copy_button=True)
|
282 |
with gr.Row():
|
283 |
+
mid_file = gr.File(label=_L("Download MIDI"), min_width=80)
|
284 |
+
pdf_file = gr.File(label=_L("Download PDF score"), min_width=80)
|
285 |
+
xml_file = gr.File(label=_L("Download MusicXML"), min_width=80)
|
286 |
+
mxl_file = gr.File(label=_L("Download MXL"), min_width=80)
|
287 |
|
288 |
with gr.Row():
|
289 |
abc_txt = gr.TextArea(
|
convert.py
CHANGED
@@ -25,16 +25,13 @@ def xml2(xml_path: str, target_fmt: str):
|
|
25 |
def pdf2img(pdf_path: str):
|
26 |
output_path = pdf_path.replace(".pdf", ".jpg")
|
27 |
doc = fitz.open(pdf_path)
|
28 |
-
# 创建一个图像列表
|
29 |
-
images = []
|
30 |
for page_number in range(doc.page_count):
|
31 |
page = doc[page_number]
|
32 |
-
# 将页面渲染为图像
|
33 |
-
image = page.get_pixmap()
|
34 |
-
# 将图像添加到列表
|
35 |
images.append(
|
36 |
Image.frombytes("RGB", [image.width, image.height], image.samples)
|
37 |
-
)
|
38 |
# 竖向合并图像
|
39 |
merged_image = Image.new(
|
40 |
"RGB", (images[0].width, sum(image.height for image in images))
|
@@ -45,8 +42,7 @@ def pdf2img(pdf_path: str):
|
|
45 |
y_offset += image.height
|
46 |
# 保存合并后的图像为JPG
|
47 |
merged_image.save(output_path, "JPEG")
|
48 |
-
# 关闭PDF文档
|
49 |
-
doc.close()
|
50 |
return output_path
|
51 |
|
52 |
|
@@ -66,7 +62,6 @@ def xml2abc(input_xml_file: str):
|
|
66 |
stdout=subprocess.PIPE,
|
67 |
text=True,
|
68 |
)
|
69 |
-
|
70 |
if result.returncode == 0:
|
71 |
return str(result.stdout).strip()
|
72 |
|
@@ -78,13 +73,11 @@ def transpose_octaves_abc(abc_notation: str, out_xml_file: str, offset=-12):
|
|
78 |
if offset < 0:
|
79 |
for part in score.parts:
|
80 |
for measure in part.getElementsByClass(stream.Measure):
|
81 |
-
# 检查当前小节的谱号
|
82 |
-
if measure.clef:
|
83 |
measure.clef = clef.BassClef()
|
84 |
|
85 |
octaves_interval = interval.Interval(offset)
|
86 |
-
# 遍历每个音符,将其上下移八度
|
87 |
-
for note in score.recurse().notes:
|
88 |
note.transpose(octaves_interval, inPlace=True)
|
89 |
|
90 |
score.write("musicxml", fp=out_xml_file)
|
|
|
25 |
def pdf2img(pdf_path: str):
|
26 |
output_path = pdf_path.replace(".pdf", ".jpg")
|
27 |
doc = fitz.open(pdf_path)
|
28 |
+
images = [] # 创建一个图像列表
|
|
|
29 |
for page_number in range(doc.page_count):
|
30 |
page = doc[page_number]
|
31 |
+
image = page.get_pixmap() # 将页面渲染为图像
|
|
|
|
|
32 |
images.append(
|
33 |
Image.frombytes("RGB", [image.width, image.height], image.samples)
|
34 |
+
) # 将图像添加到列表
|
35 |
# 竖向合并图像
|
36 |
merged_image = Image.new(
|
37 |
"RGB", (images[0].width, sum(image.height for image in images))
|
|
|
42 |
y_offset += image.height
|
43 |
# 保存合并后的图像为JPG
|
44 |
merged_image.save(output_path, "JPEG")
|
45 |
+
doc.close() # 关闭PDF文档
|
|
|
46 |
return output_path
|
47 |
|
48 |
|
|
|
62 |
stdout=subprocess.PIPE,
|
63 |
text=True,
|
64 |
)
|
|
|
65 |
if result.returncode == 0:
|
66 |
return str(result.stdout).strip()
|
67 |
|
|
|
73 |
if offset < 0:
|
74 |
for part in score.parts:
|
75 |
for measure in part.getElementsByClass(stream.Measure):
|
76 |
+
if measure.clef: # 检查当前小节的谱号
|
|
|
77 |
measure.clef = clef.BassClef()
|
78 |
|
79 |
octaves_interval = interval.Interval(offset)
|
80 |
+
for note in score.recurse().notes: # 遍历每个音符,将其上下移八度
|
|
|
81 |
note.transpose(octaves_interval, inPlace=True)
|
82 |
|
83 |
score.write("musicxml", fp=out_xml_file)
|
generate.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import re
|
2 |
import os
|
3 |
-
import shutil
|
4 |
import time
|
5 |
import torch
|
6 |
import random
|
|
|
7 |
import argparse
|
8 |
import soundfile as sf
|
9 |
from transformers import GPT2Config
|
@@ -157,7 +157,6 @@ def generate_music(
|
|
157 |
)
|
158 |
if tune == "":
|
159 |
tokens = None
|
160 |
-
|
161 |
else:
|
162 |
prefix = patchilizer.decode(input_patches[0])
|
163 |
remaining_tokens = prompt[len(prefix) :]
|
@@ -201,11 +200,9 @@ def generate_music(
|
|
201 |
|
202 |
tunes += f"{tune}\n\n"
|
203 |
print("\n")
|
204 |
-
|
205 |
# fix tempo
|
206 |
if fix_tempo != None:
|
207 |
tempo = f"Q:{fix_tempo}\n"
|
208 |
-
|
209 |
else:
|
210 |
tempo = f"Q:{random.randint(88, 132)}\n"
|
211 |
if emo == "Q1":
|
@@ -227,19 +224,16 @@ def generate_music(
|
|
227 |
tunes = tunes.replace("K:none\n", f"K:{K_val}\n")
|
228 |
|
229 |
tunes = tunes.replace(f"A:{emo}\n", tempo)
|
230 |
-
# fix mode:major/minor
|
231 |
-
mode = "major" if emo == "Q1" or emo == "Q4" else "minor"
|
232 |
if (mode == "major") and ("m" in K_val):
|
233 |
tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.split('m')[0]}\n")
|
234 |
-
|
235 |
elif (mode == "minor") and (not "m" in K_val):
|
236 |
tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.replace('dor', '')}min\n")
|
237 |
|
238 |
print("Generation time: {:.2f} seconds".format(time.time() - start_time))
|
239 |
timestamp = time.strftime("%a_%d_%b_%Y_%H_%M_%S", time.localtime())
|
240 |
try:
|
241 |
-
# fix avg_pitch (octave)
|
242 |
-
if fix_pitch != None:
|
243 |
if fix_pitch:
|
244 |
tunes, xml = transpose_octaves_abc(
|
245 |
tunes,
|
|
|
1 |
import re
|
2 |
import os
|
|
|
3 |
import time
|
4 |
import torch
|
5 |
import random
|
6 |
+
import shutil
|
7 |
import argparse
|
8 |
import soundfile as sf
|
9 |
from transformers import GPT2Config
|
|
|
157 |
)
|
158 |
if tune == "":
|
159 |
tokens = None
|
|
|
160 |
else:
|
161 |
prefix = patchilizer.decode(input_patches[0])
|
162 |
remaining_tokens = prompt[len(prefix) :]
|
|
|
200 |
|
201 |
tunes += f"{tune}\n\n"
|
202 |
print("\n")
|
|
|
203 |
# fix tempo
|
204 |
if fix_tempo != None:
|
205 |
tempo = f"Q:{fix_tempo}\n"
|
|
|
206 |
else:
|
207 |
tempo = f"Q:{random.randint(88, 132)}\n"
|
208 |
if emo == "Q1":
|
|
|
224 |
tunes = tunes.replace("K:none\n", f"K:{K_val}\n")
|
225 |
|
226 |
tunes = tunes.replace(f"A:{emo}\n", tempo)
|
227 |
+
mode = "major" if emo == "Q1" or emo == "Q4" else "minor" # fix mode:major/minor
|
|
|
228 |
if (mode == "major") and ("m" in K_val):
|
229 |
tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.split('m')[0]}\n")
|
|
|
230 |
elif (mode == "minor") and (not "m" in K_val):
|
231 |
tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.replace('dor', '')}min\n")
|
232 |
|
233 |
print("Generation time: {:.2f} seconds".format(time.time() - start_time))
|
234 |
timestamp = time.strftime("%a_%d_%b_%Y_%H_%M_%S", time.localtime())
|
235 |
try:
|
236 |
+
if fix_pitch != None: # fix avg_pitch (octave)
|
|
|
237 |
if fix_pitch:
|
238 |
tunes, xml = transpose_octaves_abc(
|
239 |
tunes,
|
utils.py
CHANGED
@@ -2,12 +2,15 @@ import os
|
|
2 |
import sys
|
3 |
import time
|
4 |
import torch
|
|
|
5 |
import requests
|
6 |
import subprocess
|
7 |
import modelscope
|
8 |
import huggingface_hub
|
9 |
from tqdm import tqdm
|
10 |
|
|
|
|
|
11 |
TEMP_DIR = "./__pycache__"
|
12 |
EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
|
13 |
WEIGHTS_DIR = (
|
@@ -23,6 +26,46 @@ CHAR_NUM_LAYERS = 3 # Number of layers in the decoder
|
|
23 |
PATCH_SAMPLING_BATCH_SIZE = 0 # Batch size for training patch, 0 for full context
|
24 |
LOAD_FROM_CHECKPOINT = True # Whether to load weights from a checkpoint
|
25 |
SHARE_WEIGHTS = False # Whether to share weights between the encoder and decoder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
|
28 |
def download(filename: str, url: str):
|
|
|
2 |
import sys
|
3 |
import time
|
4 |
import torch
|
5 |
+
import warnings
|
6 |
import requests
|
7 |
import subprocess
|
8 |
import modelscope
|
9 |
import huggingface_hub
|
10 |
from tqdm import tqdm
|
11 |
|
12 |
+
warnings.filterwarnings("ignore")
|
13 |
+
|
14 |
TEMP_DIR = "./__pycache__"
|
15 |
EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
|
16 |
WEIGHTS_DIR = (
|
|
|
26 |
PATCH_SAMPLING_BATCH_SIZE = 0 # Batch size for training patch, 0 for full context
|
27 |
LOAD_FROM_CHECKPOINT = True # Whether to load weights from a checkpoint
|
28 |
SHARE_WEIGHTS = False # Whether to share weights between the encoder and decoder
|
29 |
+
EN2ZH = {
|
30 |
+
"Low": "低",
|
31 |
+
"High": "高",
|
32 |
+
"Cite": "引用",
|
33 |
+
"Save": "保存",
|
34 |
+
"Audio": "音频",
|
35 |
+
"Minor": "小调",
|
36 |
+
"Major": "大调",
|
37 |
+
"Mode": "大小调",
|
38 |
+
"Submit": "提交",
|
39 |
+
"Staff": "五线谱",
|
40 |
+
"Status": "状态栏",
|
41 |
+
"Feedback": "反馈",
|
42 |
+
"Generate": "生成",
|
43 |
+
"Dataset": "数据集",
|
44 |
+
"BPM tempo": "BPM 速度",
|
45 |
+
"Pitch SD": "音高标准差",
|
46 |
+
"Video demo": "视频教程",
|
47 |
+
"ABC notation": "ABC 记谱",
|
48 |
+
"Download MXL": "下载 MXL",
|
49 |
+
"Save template": "保存模板",
|
50 |
+
"Download MIDI": "下载 MIDI",
|
51 |
+
"By template": "通过模板生成",
|
52 |
+
"Volume in dB": "dB 音量调节",
|
53 |
+
"±12 octave": "±12 八度上下移",
|
54 |
+
"Download template": "下载模板",
|
55 |
+
"Download MusicXML": "下载 MusicXML",
|
56 |
+
"Download PDF score": "下载 PDF 乐谱",
|
57 |
+
"By feature control": "通过特征控制生成",
|
58 |
+
"Additional info & option": "附加信息及选项",
|
59 |
+
"Generate chords coming soon": "生成和声控制暂不可用",
|
60 |
+
"The emotion to which the current template belongs": "当前模板所属情感",
|
61 |
+
"Valence: reflects negative-positive levels of emotion": "愉悦度 反映情绪的 消极-积极 程度",
|
62 |
+
"Arousal: reflects the calmness-intensity of the emotion": "唤醒度 反映情绪的 平静-激烈 程度",
|
63 |
+
"The emotion you believe the generated result should belong to": "您所认为生成结果应该所属的情感",
|
64 |
+
}
|
65 |
+
|
66 |
+
|
67 |
+
def _L(en_txt: str):
|
68 |
+
return en_txt if EN_US else f"{en_txt} ({EN2ZH[en_txt]})"
|
69 |
|
70 |
|
71 |
def download(filename: str, url: str):
|
xml2abc.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
# coding=latin-1
|
3 |
"""
|
4 |
Copyright (C) 2012-2018: W.G. Vree
|
5 |
-
Contributions: M. Tarenskeen, N. Liberg, Paul Villiger, Janus Meuris, Larry Myerscough,
|
6 |
Dick Jackson, Jan Wybren de Jong, Mark Zealey.
|
7 |
|
8 |
This program is free software; you can redistribute it and/or modify it under the terms of the
|
|
|
2 |
# coding=latin-1
|
3 |
"""
|
4 |
Copyright (C) 2012-2018: W.G. Vree
|
5 |
+
Contributions: M. Tarenskeen, N. Liberg, Paul Villiger, Janus Meuris, Larry Myerscough,
|
6 |
Dick Jackson, Jan Wybren de Jong, Mark Zealey.
|
7 |
|
8 |
This program is free software; you can redistribute it and/or modify it under the terms of the
|