Spaces:

monetjoe
/

EMelodyGen

Running

App Files Files

admin commited on 24 days ago

Commit

7ce0e34

1 Parent(s): 2176c40

sync ms

Browse files

Files changed (5) hide show

app.py +27 -64
convert.py +6 -13
generate.py +3 -9
utils.py +43 -0
xml2abc.py +1 -1

app.py CHANGED Viewed

@@ -2,67 +2,28 @@ import os
 import json
 import shutil
 import argparse
-import warnings
 import gradio as gr
 from generate import generate_music, get_args
-from utils import WEIGHTS_DIR, TEMP_DIR, EN_US
-EN2ZH = {
-    "Additional info & option": "附加信息及选项",
-    "Cite": "引用",
-    "Submit": "提交",
-    "Feedback": "反馈",
-    "The emotion you believe the generated result should belong to": "您所认为生成结果应该所属的情感",
-    "Staff": "五线谱",
-    "ABC notation": "ABC 记谱",
-    "Download MXL": "下载 MXL",
-    "Download MusicXML": "下载 MusicXML",
-    "Download PDF score": "下载 PDF 乐谱",
-    "Download MIDI": "下载 MIDI",
-    "Audio": "音频",
-    "Download template": "下载模板",
-    "Save template": "保存模板",
-    "Save": "保存",
-    "The emotion to which the current template belongs": "当前模板所属情感",
-    "Generate": "生成",
-    "Generate chords coming soon": "生成和声控制暂不可用",
-    "Volume in dB": "dB 音量调节",
-    "±12 octave": "±12 八度上下移",
-    "BPM tempo": "BPM 速度",
-    "Minor": "小调",
-    "Major": "大调",
-    "Mode": "大小调",
-    "Pitch SD": "音高标准差",
-    "Low": "低",
-    "High": "高",
-    "By feature control": "通过特征控制生成",
-    "By template": "通过模板生成",
-    "Arousal: reflects the calmness-intensity of the emotion": "唤醒度 反映情绪的 平静-激烈 程度",
-    "Valence: reflects negative-positive levels of emotion": "愉悦度 反映情绪的 消极-积极 程度",
-    "Video demo": "视频教程",
-    "Dataset": "数据集",
-    "Status": "状态栏",
-}
-def _L(en_txt: str):
-    return en_txt if EN_US else f"{en_txt} ({EN2ZH[en_txt]})"
 def infer_by_template(dataset: str, v: str, a: str, add_chord: bool):
     status = "Success"
     audio = midi = pdf = xml = mxl = tunes = jpg = None
-    emotion = "Q1"
-    if v == _L("Low") and a == _L("High"):
-        emotion = "Q2"
-    elif v == _L("Low") and a == _L("Low"):
-        emotion = "Q3"
-    elif v == _L("High") and a == _L("Low"):
-        emotion = "Q4"
-    try:
         parser = argparse.ArgumentParser()
         args = get_args(parser)
         args.template = True
@@ -89,17 +50,20 @@ def infer_by_features(
 ):
     status = "Success"
     audio = midi = pdf = xml = mxl = tunes = jpg = None
-    emotion = "Q1"
-    if mode == _L("Minor") and pitch_std == _L("High"):
-        emotion = "Q2"
-    elif mode == _L("Minor") and pitch_std == _L("Low"):
-        emotion = "Q3"
-    elif mode == _L("Major") and pitch_std == _L("Low"):
-        emotion = "Q4"
-    try:
         parser = argparse.ArgumentParser()
         args = get_args(parser)
         args.template = False
@@ -191,7 +155,6 @@ def save_template(label: str, pitch_std: str, mode: str, tempo: int, octave: int
 if __name__ == "__main__":
-    warnings.filterwarnings("ignore")
     with gr.Blocks() as demo:
         if EN_US:
             gr.Markdown(
@@ -317,10 +280,10 @@ if __name__ == "__main__":
                 status_bar = gr.Textbox(label=_L("Status"), show_copy_button=True)
                 with gr.Row():
-                    mid_file = gr.File(label=_L("Download MIDI"), min_width=40)
-                    pdf_file = gr.File(label=_L("Download PDF score"), min_width=40)
-                    xml_file = gr.File(label=_L("Download MusicXML"), min_width=40)
-                    mxl_file = gr.File(label=_L("Download MXL"), min_width=40)
                 with gr.Row():
                     abc_txt = gr.TextArea(

 import json
 import shutil
 import argparse
 import gradio as gr
 from generate import generate_music, get_args
+from utils import _L, WEIGHTS_DIR, TEMP_DIR, EN_US
 def infer_by_template(dataset: str, v: str, a: str, add_chord: bool):
     status = "Success"
     audio = midi = pdf = xml = mxl = tunes = jpg = None
+    try:
+        emotion = "Q1"
+        if v == _L("Low") and a == _L("High"):
+            emotion = "Q2"
+        elif v == _L("Low") and a == _L("Low"):
+            emotion = "Q3"
+        elif v == _L("High") and a == _L("Low"):
+            emotion = "Q4"
+        if add_chord:
+            print("Chord generation comes soon!")
         parser = argparse.ArgumentParser()
         args = get_args(parser)
         args.template = True
 ):
     status = "Success"
     audio = midi = pdf = xml = mxl = tunes = jpg = None
+    try:
+        emotion = "Q1"
+        if mode == _L("Minor") and pitch_std == _L("High"):
+            emotion = "Q2"
+        elif mode == _L("Minor") and pitch_std == _L("Low"):
+            emotion = "Q3"
+        elif mode == _L("Major") and pitch_std == _L("Low"):
+            emotion = "Q4"
+        if add_chord:
+            print("Chord generation comes soon!")
         parser = argparse.ArgumentParser()
         args = get_args(parser)
         args.template = False
 if __name__ == "__main__":
     with gr.Blocks() as demo:
         if EN_US:
             gr.Markdown(
                 status_bar = gr.Textbox(label=_L("Status"), show_copy_button=True)
                 with gr.Row():
+                    mid_file = gr.File(label=_L("Download MIDI"), min_width=80)
+                    pdf_file = gr.File(label=_L("Download PDF score"), min_width=80)
+                    xml_file = gr.File(label=_L("Download MusicXML"), min_width=80)
+                    mxl_file = gr.File(label=_L("Download MXL"), min_width=80)
                 with gr.Row():
                     abc_txt = gr.TextArea(

convert.py CHANGED Viewed

@@ -25,16 +25,13 @@ def xml2(xml_path: str, target_fmt: str):
 def pdf2img(pdf_path: str):
     output_path = pdf_path.replace(".pdf", ".jpg")
     doc = fitz.open(pdf_path)
-    # 创建一个图像列表
-    images = []
     for page_number in range(doc.page_count):
         page = doc[page_number]
-        # 将页面渲染为图像
-        image = page.get_pixmap()
-        # 将图像添加到列表
         images.append(
             Image.frombytes("RGB", [image.width, image.height], image.samples)
-        )
     # 竖向合并图像
     merged_image = Image.new(
         "RGB", (images[0].width, sum(image.height for image in images))
@@ -45,8 +42,7 @@ def pdf2img(pdf_path: str):
         y_offset += image.height
     # 保存合并后的图像为JPG
     merged_image.save(output_path, "JPEG")
-    # 关闭PDF文档
-    doc.close()
     return output_path
@@ -66,7 +62,6 @@ def xml2abc(input_xml_file: str):
         stdout=subprocess.PIPE,
         text=True,
     )
     if result.returncode == 0:
         return str(result.stdout).strip()
@@ -78,13 +73,11 @@ def transpose_octaves_abc(abc_notation: str, out_xml_file: str, offset=-12):
     if offset < 0:
         for part in score.parts:
             for measure in part.getElementsByClass(stream.Measure):
-                # 检查当前小节的谱号
-                if measure.clef:
                     measure.clef = clef.BassClef()
     octaves_interval = interval.Interval(offset)
-    # 遍历每个音符，将其上下移八度
-    for note in score.recurse().notes:
         note.transpose(octaves_interval, inPlace=True)
     score.write("musicxml", fp=out_xml_file)

 def pdf2img(pdf_path: str):
     output_path = pdf_path.replace(".pdf", ".jpg")
     doc = fitz.open(pdf_path)
+    images = []  # 创建一个图像列表
     for page_number in range(doc.page_count):
         page = doc[page_number]
+        image = page.get_pixmap()  # 将页面渲染为图像
         images.append(
             Image.frombytes("RGB", [image.width, image.height], image.samples)
+        )  # 将图像添加到列表
     # 竖向合并图像
     merged_image = Image.new(
         "RGB", (images[0].width, sum(image.height for image in images))
         y_offset += image.height
     # 保存合并后的图像为JPG
     merged_image.save(output_path, "JPEG")
+    doc.close()  # 关闭PDF文档
     return output_path
         stdout=subprocess.PIPE,
         text=True,
     )
     if result.returncode == 0:
         return str(result.stdout).strip()
     if offset < 0:
         for part in score.parts:
             for measure in part.getElementsByClass(stream.Measure):
+                if measure.clef:  # 检查当前小节的谱号
                     measure.clef = clef.BassClef()
     octaves_interval = interval.Interval(offset)
+    for note in score.recurse().notes:  # 遍历每个音符，将其上下移八度
         note.transpose(octaves_interval, inPlace=True)
     score.write("musicxml", fp=out_xml_file)

generate.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import re
 import os
-import shutil
 import time
 import torch
 import random
 import argparse
 import soundfile as sf
 from transformers import GPT2Config
@@ -157,7 +157,6 @@ def generate_music(
         )
         if tune == "":
             tokens = None
         else:
             prefix = patchilizer.decode(input_patches[0])
             remaining_tokens = prompt[len(prefix) :]
@@ -201,11 +200,9 @@ def generate_music(
         tunes += f"{tune}\n\n"
         print("\n")
     # fix tempo
     if fix_tempo != None:
         tempo = f"Q:{fix_tempo}\n"
     else:
         tempo = f"Q:{random.randint(88, 132)}\n"
         if emo == "Q1":
@@ -227,19 +224,16 @@ def generate_music(
         tunes = tunes.replace("K:none\n", f"K:{K_val}\n")
     tunes = tunes.replace(f"A:{emo}\n", tempo)
-    # fix mode:major/minor
-    mode = "major" if emo == "Q1" or emo == "Q4" else "minor"
     if (mode == "major") and ("m" in K_val):
         tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.split('m')[0]}\n")
     elif (mode == "minor") and (not "m" in K_val):
         tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.replace('dor', '')}min\n")
     print("Generation time: {:.2f} seconds".format(time.time() - start_time))
     timestamp = time.strftime("%a_%d_%b_%Y_%H_%M_%S", time.localtime())
     try:
-        # fix avg_pitch (octave)
-        if fix_pitch != None:
             if fix_pitch:
                 tunes, xml = transpose_octaves_abc(
                     tunes,

 import re
 import os
 import time
 import torch
 import random
+import shutil
 import argparse
 import soundfile as sf
 from transformers import GPT2Config
         )
         if tune == "":
             tokens = None
         else:
             prefix = patchilizer.decode(input_patches[0])
             remaining_tokens = prompt[len(prefix) :]
         tunes += f"{tune}\n\n"
         print("\n")
     # fix tempo
     if fix_tempo != None:
         tempo = f"Q:{fix_tempo}\n"
     else:
         tempo = f"Q:{random.randint(88, 132)}\n"
         if emo == "Q1":
         tunes = tunes.replace("K:none\n", f"K:{K_val}\n")
     tunes = tunes.replace(f"A:{emo}\n", tempo)
+    mode = "major" if emo == "Q1" or emo == "Q4" else "minor"  # fix mode:major/minor
     if (mode == "major") and ("m" in K_val):
         tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.split('m')[0]}\n")
     elif (mode == "minor") and (not "m" in K_val):
         tunes = tunes.replace(f"\nK:{K_val}\n", f"\nK:{K_val.replace('dor', '')}min\n")
     print("Generation time: {:.2f} seconds".format(time.time() - start_time))
     timestamp = time.strftime("%a_%d_%b_%Y_%H_%M_%S", time.localtime())
     try:
+        if fix_pitch != None:  # fix avg_pitch (octave)
             if fix_pitch:
                 tunes, xml = transpose_octaves_abc(
                     tunes,

utils.py CHANGED Viewed

@@ -2,12 +2,15 @@ import os
 import sys
 import time
 import torch
 import requests
 import subprocess
 import modelscope
 import huggingface_hub
 from tqdm import tqdm
 TEMP_DIR = "./__pycache__"
 EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
 WEIGHTS_DIR = (
@@ -23,6 +26,46 @@ CHAR_NUM_LAYERS = 3  # Number of layers in the decoder
 PATCH_SAMPLING_BATCH_SIZE = 0  # Batch size for training patch, 0 for full context
 LOAD_FROM_CHECKPOINT = True  # Whether to load weights from a checkpoint
 SHARE_WEIGHTS = False  # Whether to share weights between the encoder and decoder
 def download(filename: str, url: str):

 import sys
 import time
 import torch
+import warnings
 import requests
 import subprocess
 import modelscope
 import huggingface_hub
 from tqdm import tqdm
+warnings.filterwarnings("ignore")
 TEMP_DIR = "./__pycache__"
 EN_US = os.getenv("LANG") != "zh_CN.UTF-8"
 WEIGHTS_DIR = (
 PATCH_SAMPLING_BATCH_SIZE = 0  # Batch size for training patch, 0 for full context
 LOAD_FROM_CHECKPOINT = True  # Whether to load weights from a checkpoint
 SHARE_WEIGHTS = False  # Whether to share weights between the encoder and decoder
+EN2ZH = {
+    "Low": "低",
+    "High": "高",
+    "Cite": "引用",
+    "Save": "保存",
+    "Audio": "音频",
+    "Minor": "小调",
+    "Major": "大调",
+    "Mode": "大小调",
+    "Submit": "提交",
+    "Staff": "五线谱",
+    "Status": "状态栏",
+    "Feedback": "反馈",
+    "Generate": "生成",
+    "Dataset": "数据集",
+    "BPM tempo": "BPM 速度",
+    "Pitch SD": "音高标准差",
+    "Video demo": "视频教程",
+    "ABC notation": "ABC 记谱",
+    "Download MXL": "下载 MXL",
+    "Save template": "保存模板",
+    "Download MIDI": "下载 MIDI",
+    "By template": "通过模板生成",
+    "Volume in dB": "dB 音量调节",
+    "±12 octave": "±12 八度上下移",
+    "Download template": "下载模板",
+    "Download MusicXML": "下载 MusicXML",
+    "Download PDF score": "下载 PDF 乐谱",
+    "By feature control": "通过特征控制生成",
+    "Additional info & option": "附加信息及选项",
+    "Generate chords coming soon": "生成和声控制暂不可用",
+    "The emotion to which the current template belongs": "当前模板所属情感",
+    "Valence: reflects negative-positive levels of emotion": "愉悦度 反映情绪的 消极-积极 程度",
+    "Arousal: reflects the calmness-intensity of the emotion": "唤醒度 反映情绪的 平静-激烈 程度",
+    "The emotion you believe the generated result should belong to": "您所认为生成结果应该所属的情感",
+}
+def _L(en_txt: str):
+    return en_txt if EN_US else f"{en_txt} ({EN2ZH[en_txt]})"
 def download(filename: str, url: str):

xml2abc.py CHANGED Viewed

@@ -2,7 +2,7 @@
 # coding=latin-1
 """
 Copyright (C) 2012-2018: W.G. Vree
-Contributions: M. Tarenskeen, N. Liberg, Paul Villiger, Janus Meuris, Larry Myerscough,
 Dick Jackson, Jan Wybren de Jong, Mark Zealey.
 This program is free software; you can redistribute it and/or modify it under the terms of the

 # coding=latin-1
 """
 Copyright (C) 2012-2018: W.G. Vree
+Contributions: M. Tarenskeen, N. Liberg, Paul Villiger, Janus Meuris, Larry Myerscough,
 Dick Jackson, Jan Wybren de Jong, Mark Zealey.
 This program is free software; you can redistribute it and/or modify it under the terms of the