root
commited on
Commit
·
d6fb232
1
Parent(s):
c6a60af
token extend
Browse files
app.py
CHANGED
@@ -1952,7 +1952,10 @@ The lyrics should:
|
|
1952 |
|
1953 |
IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
|
1954 |
where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
|
1955 |
-
even if there are no rhythm issues.
|
|
|
|
|
|
|
1956 |
|
1957 |
Your lyrics:
|
1958 |
"""
|
@@ -2002,7 +2005,10 @@ Instead, write lyrics that flow naturally and match the music's rhythm precisely
|
|
2002 |
|
2003 |
IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
|
2004 |
where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
|
2005 |
-
even if there are no rhythm issues.
|
|
|
|
|
|
|
2006 |
|
2007 |
Your lyrics:
|
2008 |
"""
|
@@ -2012,23 +2018,12 @@ Your lyrics:
|
|
2012 |
{"role": "user", "content": content}
|
2013 |
]
|
2014 |
|
2015 |
-
# Apply chat template
|
2016 |
-
|
2017 |
-
|
2018 |
-
|
2019 |
-
|
2020 |
-
|
2021 |
-
add_generation_prompt=True,
|
2022 |
-
enable_thinking=True # Only works with models that support thinking mode
|
2023 |
-
)
|
2024 |
-
except Exception as e:
|
2025 |
-
# Fallback to standard template if thinking mode not supported
|
2026 |
-
print(f"Thinking mode not supported, using standard template: {str(e)}")
|
2027 |
-
text = llm_tokenizer.apply_chat_template(
|
2028 |
-
messages,
|
2029 |
-
tokenize=False,
|
2030 |
-
add_generation_prompt=True
|
2031 |
-
)
|
2032 |
|
2033 |
# Generate lyrics using the LLM
|
2034 |
model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
|
@@ -2038,9 +2033,9 @@ Your lyrics:
|
|
2038 |
"do_sample": True,
|
2039 |
"temperature": 0.6, # Lower for more consistent rhythm alignment
|
2040 |
"top_p": 0.95,
|
2041 |
-
"top_k": 20
|
2042 |
"repetition_penalty": 1.2,
|
2043 |
-
"max_new_tokens":
|
2044 |
}
|
2045 |
|
2046 |
# Generate output
|
@@ -2052,35 +2047,20 @@ Your lyrics:
|
|
2052 |
# Extract output tokens
|
2053 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
2054 |
|
2055 |
-
#
|
2056 |
-
|
2057 |
-
|
2058 |
-
|
2059 |
-
|
2060 |
-
|
2061 |
-
|
2062 |
-
|
2063 |
-
|
2064 |
-
|
2065 |
-
|
2066 |
-
|
2067 |
-
|
2068 |
-
|
2069 |
-
for model_name, token_id in think_end_tokens.items():
|
2070 |
-
if token_id in output_ids:
|
2071 |
-
found_token = token_id
|
2072 |
-
token_position = output_ids.index(token_id) + 1
|
2073 |
-
break
|
2074 |
-
|
2075 |
-
# Use the position of the thinking token if found
|
2076 |
-
if found_token:
|
2077 |
-
lyrics = llm_tokenizer.decode(output_ids[token_position:], skip_special_tokens=True).strip()
|
2078 |
-
else:
|
2079 |
-
lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
|
2080 |
-
except (ValueError, IndexError, AttributeError) as e:
|
2081 |
-
print(f"Error processing thinking output: {str(e)}")
|
2082 |
-
# Default behavior if thinking mode processing fails
|
2083 |
-
lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
|
2084 |
|
2085 |
# Verify syllable counts with enhanced verification
|
2086 |
if templates_for_verification:
|
|
|
1952 |
|
1953 |
IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
|
1954 |
where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
|
1955 |
+
even if there are no rhythm issues. Include the following in your analysis:
|
1956 |
+
1. Syllable counts for each line and how they match the rhythm pattern
|
1957 |
+
2. Where stressed syllables align with strong beats
|
1958 |
+
3. Any potential misalignments or improvements
|
1959 |
|
1960 |
Your lyrics:
|
1961 |
"""
|
|
|
2005 |
|
2006 |
IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
|
2007 |
where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
|
2008 |
+
even if there are no rhythm issues. Include the following in your analysis:
|
2009 |
+
1. Syllable counts for each line and how they match the rhythm pattern
|
2010 |
+
2. Where stressed syllables align with strong beats
|
2011 |
+
3. Any potential misalignments or improvements
|
2012 |
|
2013 |
Your lyrics:
|
2014 |
"""
|
|
|
2018 |
{"role": "user", "content": content}
|
2019 |
]
|
2020 |
|
2021 |
+
# Apply standard chat template without thinking enabled
|
2022 |
+
text = llm_tokenizer.apply_chat_template(
|
2023 |
+
messages,
|
2024 |
+
tokenize=False,
|
2025 |
+
add_generation_prompt=True
|
2026 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2027 |
|
2028 |
# Generate lyrics using the LLM
|
2029 |
model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
|
|
|
2033 |
"do_sample": True,
|
2034 |
"temperature": 0.6, # Lower for more consistent rhythm alignment
|
2035 |
"top_p": 0.95,
|
2036 |
+
"top_k": 50, # Increased from 20 for more diversity
|
2037 |
"repetition_penalty": 1.2,
|
2038 |
+
"max_new_tokens": 2048 # Doubled from 1024 for more comprehensive lyrics
|
2039 |
}
|
2040 |
|
2041 |
# Generate output
|
|
|
2047 |
# Extract output tokens
|
2048 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
2049 |
|
2050 |
+
# Skip the thinking process completely and just get the raw output
|
2051 |
+
lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
|
2052 |
+
|
2053 |
+
# If we find <thinking> tags, extract only the content after </thinking>
|
2054 |
+
if "<thinking>" in lyrics and "</thinking>" in lyrics:
|
2055 |
+
lyrics = lyrics.split("</thinking>")[1].strip()
|
2056 |
+
|
2057 |
+
# Remove any other thinking indicators that might be present
|
2058 |
+
thinking_markers = ["<think>", "</think>", "[thinking]", "[/thinking]", "I'll think step by step:"]
|
2059 |
+
for marker in thinking_markers:
|
2060 |
+
if marker in lyrics:
|
2061 |
+
parts = lyrics.split(marker)
|
2062 |
+
if len(parts) > 1:
|
2063 |
+
lyrics = parts[-1].strip() # Take the last part after any thinking marker
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2064 |
|
2065 |
# Verify syllable counts with enhanced verification
|
2066 |
if templates_for_verification:
|