root
commited on
Commit
·
173048d
1
Parent(s):
e922466
push
Browse files
app.py
CHANGED
@@ -1677,6 +1677,11 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
1677 |
syllable_guidance = ""
|
1678 |
templates_for_verification = []
|
1679 |
|
|
|
|
|
|
|
|
|
|
|
1680 |
if song_structure:
|
1681 |
# Try to use flexible structure if available
|
1682 |
if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
|
@@ -1685,15 +1690,22 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
1685 |
# Get the segments
|
1686 |
segments = flexible["segments"]
|
1687 |
|
|
|
|
|
|
|
|
|
1688 |
# Process each segment to create enhanced rhythmic templates
|
1689 |
enhanced_templates = []
|
1690 |
|
1691 |
for i, segment in enumerate(segments):
|
1692 |
-
if i <
|
1693 |
# Get the beat information for this segment
|
1694 |
segment_start = segment["start"]
|
1695 |
segment_end = segment["end"]
|
1696 |
|
|
|
|
|
|
|
1697 |
# Find beats within this segment
|
1698 |
segment_beats = []
|
1699 |
beat_times = flexible["beats"]["beat_times"]
|
@@ -1727,24 +1739,129 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
1727 |
)
|
1728 |
enhanced_templates.append(enhanced_template)
|
1729 |
templates_for_verification.append(enhanced_template)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1730 |
|
1731 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
1732 |
syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
|
1733 |
-
syllable_guidance += "
|
1734 |
-
syllable_guidance +=
|
1735 |
-
|
1736 |
-
|
1737 |
-
|
1738 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1739 |
|
1740 |
-
# Note: The enhanced formatter now automatically includes explanations
|
1741 |
-
|
1742 |
# Fallback to traditional sections if needed
|
1743 |
elif "syllables" in song_structure and song_structure["syllables"]:
|
1744 |
syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
|
1745 |
-
syllable_guidance += "Follow these syllable patterns for each section:\n\n"
|
|
|
|
|
|
|
1746 |
|
1747 |
for section in song_structure["syllables"]:
|
|
|
|
|
1748 |
if "syllable_template" in section:
|
1749 |
# Process to create enhanced template
|
1750 |
section_beats_info = {
|
@@ -1779,6 +1896,20 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
1779 |
templates_for_verification.append(section)
|
1780 |
elif "syllable_count" in section:
|
1781 |
syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1782 |
|
1783 |
# If we couldn't get specific templates, use general guidance
|
1784 |
if not syllable_guidance:
|
@@ -1789,6 +1920,21 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
1789 |
syllable_guidance += " - Fast tempo (>120 BPM): 4-6 syllables per line\n"
|
1790 |
syllable_guidance += " - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
|
1791 |
syllable_guidance += " - Slow tempo (<90 BPM): 8-10 syllables per line\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1792 |
|
1793 |
# Add examples of syllable-beat alignment with enhanced format
|
1794 |
syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
|
@@ -1833,8 +1979,10 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
1833 |
# Add genre guidance to the main guidance
|
1834 |
syllable_guidance += genre_guidance
|
1835 |
|
1836 |
-
#
|
1837 |
-
|
|
|
|
|
1838 |
if song_structure and "flexible_structure" in song_structure and song_structure["flexible_structure"]:
|
1839 |
# If we have more than 4 segments, it's likely not a traditional song structure
|
1840 |
if "segments" in song_structure["flexible_structure"]:
|
@@ -1842,73 +1990,6 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
1842 |
if len(segments) > 4:
|
1843 |
use_sections = False
|
1844 |
|
1845 |
-
# Calculate appropriate lyrics length and section distribution
|
1846 |
-
try:
|
1847 |
-
if song_structure and "beats" in song_structure:
|
1848 |
-
beats_info = song_structure["beats"]
|
1849 |
-
tempo = beats_info.get("tempo", 120)
|
1850 |
-
time_signature = beats_info.get("time_signature", 4)
|
1851 |
-
lines_structure = calculate_lyrics_length(duration, tempo, time_signature)
|
1852 |
-
|
1853 |
-
# Handle both possible return types
|
1854 |
-
if isinstance(lines_structure, dict):
|
1855 |
-
total_lines = lines_structure["lines_count"]
|
1856 |
-
|
1857 |
-
# Extract section line counts if available
|
1858 |
-
verse_lines = 0
|
1859 |
-
chorus_lines = 0
|
1860 |
-
bridge_lines = 0
|
1861 |
-
|
1862 |
-
for section in lines_structure["sections"]:
|
1863 |
-
if section["type"] == "verse":
|
1864 |
-
verse_lines = section["lines"]
|
1865 |
-
elif section["type"] == "chorus":
|
1866 |
-
chorus_lines = section["lines"]
|
1867 |
-
elif section["type"] == "bridge":
|
1868 |
-
bridge_lines = section["lines"]
|
1869 |
-
else:
|
1870 |
-
# The function returned just an integer (old behavior)
|
1871 |
-
total_lines = lines_structure
|
1872 |
-
|
1873 |
-
# Default section distribution based on total lines
|
1874 |
-
if total_lines <= 6:
|
1875 |
-
verse_lines = 2
|
1876 |
-
chorus_lines = 2
|
1877 |
-
bridge_lines = 0
|
1878 |
-
elif total_lines <= 10:
|
1879 |
-
verse_lines = 3
|
1880 |
-
chorus_lines = 2
|
1881 |
-
bridge_lines = 0
|
1882 |
-
else:
|
1883 |
-
verse_lines = 3
|
1884 |
-
chorus_lines = 2
|
1885 |
-
bridge_lines = 2
|
1886 |
-
else:
|
1887 |
-
# Fallback to simple calculation
|
1888 |
-
total_lines = max(4, int(duration / 10))
|
1889 |
-
|
1890 |
-
# Default section distribution
|
1891 |
-
if total_lines <= 6:
|
1892 |
-
verse_lines = 2
|
1893 |
-
chorus_lines = 2
|
1894 |
-
bridge_lines = 0
|
1895 |
-
elif total_lines <= 10:
|
1896 |
-
verse_lines = 3
|
1897 |
-
chorus_lines = 2
|
1898 |
-
bridge_lines = 0
|
1899 |
-
else:
|
1900 |
-
verse_lines = 3
|
1901 |
-
chorus_lines = 2
|
1902 |
-
bridge_lines = 2
|
1903 |
-
except Exception as e:
|
1904 |
-
print(f"Error calculating lyrics length: {str(e)}")
|
1905 |
-
total_lines = max(4, int(duration / 10))
|
1906 |
-
|
1907 |
-
# Default section distribution
|
1908 |
-
verse_lines = 3
|
1909 |
-
chorus_lines = 2
|
1910 |
-
bridge_lines = 0
|
1911 |
-
|
1912 |
# Create enhanced prompt with better rhythm alignment instructions
|
1913 |
if use_sections:
|
1914 |
# Traditional approach with sections
|
@@ -1939,14 +2020,12 @@ Think step by step about how to match words to the rhythm pattern:
|
|
1939 |
3. Count syllables carefully to ensure they match the pattern precisely
|
1940 |
4. Test your line against the pattern by mapping each syllable
|
1941 |
|
|
|
|
|
1942 |
The lyrics should:
|
1943 |
- Perfectly capture the essence and style of {genre} music
|
1944 |
- Express the {primary_emotion} emotion and {primary_theme} theme
|
1945 |
-
-
|
1946 |
-
- Follow this structure:
|
1947 |
-
* Verse: {verse_lines} lines
|
1948 |
-
* Chorus: {chorus_lines} lines
|
1949 |
-
* {f'Bridge: {bridge_lines} lines' if bridge_lines > 0 else ''}
|
1950 |
- Be completely original
|
1951 |
- Match the song duration of {duration:.1f} seconds
|
1952 |
|
@@ -1988,6 +2067,8 @@ Think step by step about how to match words to the rhythm pattern:
|
|
1988 |
3. Count syllables carefully to ensure they match the pattern precisely
|
1989 |
4. Test your line against the pattern by mapping each syllable
|
1990 |
|
|
|
|
|
1991 |
For perfect alignment examples:
|
1992 |
- "FEEL the RHY-thm in your SOUL" – stressed syllables on strong beats
|
1993 |
- "to-DAY we DANCE a-LONG" – natural speech stress matches musical stress
|
@@ -2000,8 +2081,8 @@ The lyrics should:
|
|
2000 |
- Maintain a consistent theme throughout
|
2001 |
- Match the audio segment duration of {duration:.1f} seconds
|
2002 |
|
2003 |
-
|
2004 |
-
|
2005 |
|
2006 |
IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
|
2007 |
where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
|
@@ -2144,29 +2225,6 @@ Improved lyrics with fixed rhythm:
|
|
2144 |
# No significant issues detected
|
2145 |
lyrics = verified_lyrics
|
2146 |
|
2147 |
-
# Add section labels if they're not present and we're using the traditional approach
|
2148 |
-
if use_sections and "Verse" not in lyrics and "Chorus" not in lyrics:
|
2149 |
-
lines = lyrics.split('\n')
|
2150 |
-
formatted_lyrics = []
|
2151 |
-
|
2152 |
-
line_count = 0
|
2153 |
-
for i, line in enumerate(lines):
|
2154 |
-
if not line.strip():
|
2155 |
-
formatted_lyrics.append(line)
|
2156 |
-
continue
|
2157 |
-
|
2158 |
-
if line_count == 0:
|
2159 |
-
formatted_lyrics.append("[Verse]")
|
2160 |
-
elif line_count == verse_lines:
|
2161 |
-
formatted_lyrics.append("\n[Chorus]")
|
2162 |
-
elif line_count == verse_lines + chorus_lines and bridge_lines > 0:
|
2163 |
-
formatted_lyrics.append("\n[Bridge]")
|
2164 |
-
|
2165 |
-
formatted_lyrics.append(line)
|
2166 |
-
line_count += 1
|
2167 |
-
|
2168 |
-
lyrics = '\n'.join(formatted_lyrics)
|
2169 |
-
|
2170 |
# Check if we have the [RHYTHM_ANALYSIS_SECTION] tag
|
2171 |
if "[RHYTHM_ANALYSIS_SECTION]" in lyrics:
|
2172 |
# Split at our custom marker
|
@@ -2185,9 +2243,6 @@ Improved lyrics with fixed rhythm:
|
|
2185 |
# No analysis found, add a minimal one
|
2186 |
lyrics = lyrics + "\n\n[Note: Rhythm Analysis]\nNo rhythm issues detected. All syllables align well with the beat pattern."
|
2187 |
|
2188 |
-
# Store the syllable guidance for later use
|
2189 |
-
syllable_guidance_text = syllable_guidance
|
2190 |
-
|
2191 |
# Before returning, add syllable analysis and prompt template
|
2192 |
if isinstance(lyrics, str):
|
2193 |
# Extract clean lyrics and analysis
|
@@ -2206,15 +2261,22 @@ Improved lyrics with fixed rhythm:
|
|
2206 |
if templates_for_verification:
|
2207 |
syllable_analysis += "Template Analysis:\n"
|
2208 |
for i, template in enumerate(templates_for_verification):
|
2209 |
-
|
2210 |
-
|
2211 |
-
if
|
2212 |
-
|
2213 |
-
|
2214 |
-
|
2215 |
-
|
2216 |
-
|
2217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2218 |
|
2219 |
# Create prompt template
|
2220 |
prompt_template = "=== PROMPT TEMPLATE ===\n\n"
|
@@ -2284,7 +2346,139 @@ def process_audio(audio_file):
|
|
2284 |
|
2285 |
# Calculate detailed song structure for better lyrics alignment
|
2286 |
try:
|
2287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2288 |
except Exception as e:
|
2289 |
print(f"Error analyzing song structure: {str(e)}")
|
2290 |
# Continue with a simpler approach if this fails
|
|
|
1677 |
syllable_guidance = ""
|
1678 |
templates_for_verification = []
|
1679 |
|
1680 |
+
# Create a structure visualization to help with lyrics-music matching
|
1681 |
+
structure_visualization = "=== MUSIC-LYRICS STRUCTURE MATCHING ===\n\n"
|
1682 |
+
structure_visualization += f"Song Duration: {duration:.1f} seconds\n"
|
1683 |
+
structure_visualization += f"Tempo: {tempo:.1f} BPM\n\n"
|
1684 |
+
|
1685 |
if song_structure:
|
1686 |
# Try to use flexible structure if available
|
1687 |
if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
|
|
|
1690 |
# Get the segments
|
1691 |
segments = flexible["segments"]
|
1692 |
|
1693 |
+
# Add structure visualization
|
1694 |
+
structure_visualization += f"Total segments: {len(segments)}\n"
|
1695 |
+
structure_visualization += "Each segment represents one musical phrase for which you should write ONE line of lyrics.\n\n"
|
1696 |
+
|
1697 |
# Process each segment to create enhanced rhythmic templates
|
1698 |
enhanced_templates = []
|
1699 |
|
1700 |
for i, segment in enumerate(segments):
|
1701 |
+
if i < 30: # Extend limit to 30 lines to handle longer songs
|
1702 |
# Get the beat information for this segment
|
1703 |
segment_start = segment["start"]
|
1704 |
segment_end = segment["end"]
|
1705 |
|
1706 |
+
# Add segment info to visualization
|
1707 |
+
structure_visualization += f"Segment {i+1}: {segment_start:.1f}s - {segment_end:.1f}s (duration: {segment_end-segment_start:.1f}s)\n"
|
1708 |
+
|
1709 |
# Find beats within this segment
|
1710 |
segment_beats = []
|
1711 |
beat_times = flexible["beats"]["beat_times"]
|
|
|
1739 |
)
|
1740 |
enhanced_templates.append(enhanced_template)
|
1741 |
templates_for_verification.append(enhanced_template)
|
1742 |
+
|
1743 |
+
# Add template to visualization
|
1744 |
+
structure_visualization += f" Template: {enhanced_template}\n"
|
1745 |
+
|
1746 |
+
# Use these templates to determine verse/chorus structure based on similar patterns
|
1747 |
+
# This is a simple version - could be enhanced with more sophisticated pattern detection
|
1748 |
+
section_types = []
|
1749 |
+
pattern_groups = {}
|
1750 |
+
|
1751 |
+
for i, template in enumerate(enhanced_templates):
|
1752 |
+
# Create simplified version for pattern matching
|
1753 |
+
simple_pattern = template.replace("(", "").replace(")", "").replace(":", "")
|
1754 |
+
|
1755 |
+
# Check if this pattern is similar to any we've seen
|
1756 |
+
found_match = False
|
1757 |
+
for group, patterns in pattern_groups.items():
|
1758 |
+
if any(simple_pattern == p.replace("(", "").replace(")", "").replace(":", "") for p in patterns):
|
1759 |
+
pattern_groups[group].append(template)
|
1760 |
+
section_types.append(group)
|
1761 |
+
found_match = True
|
1762 |
+
break
|
1763 |
+
|
1764 |
+
if not found_match:
|
1765 |
+
# New pattern type
|
1766 |
+
group_name = f"Group_{len(pattern_groups) + 1}"
|
1767 |
+
pattern_groups[group_name] = [template]
|
1768 |
+
section_types.append(group_name)
|
1769 |
+
|
1770 |
+
# Map pattern groups to verse/chorus/bridge based on common structures
|
1771 |
+
section_mapping = {}
|
1772 |
+
if len(pattern_groups) >= 1:
|
1773 |
+
# Assume the most common pattern is the verse
|
1774 |
+
most_common = max(pattern_groups.items(), key=lambda x: len(x[1]))[0]
|
1775 |
+
section_mapping[most_common] = "verse"
|
1776 |
+
|
1777 |
+
if len(pattern_groups) >= 2:
|
1778 |
+
# Second most common might be chorus
|
1779 |
+
sorted_groups = sorted(pattern_groups.items(), key=lambda x: len(x[1]), reverse=True)
|
1780 |
+
if len(sorted_groups) > 1:
|
1781 |
+
section_mapping[sorted_groups[1][0]] = "chorus"
|
1782 |
+
|
1783 |
+
if len(pattern_groups) >= 3:
|
1784 |
+
# Third pattern could be bridge
|
1785 |
+
sorted_groups = sorted(pattern_groups.items(), key=lambda x: len(x[1]), reverse=True)
|
1786 |
+
if len(sorted_groups) > 2:
|
1787 |
+
section_mapping[sorted_groups[2][0]] = "bridge"
|
1788 |
+
|
1789 |
+
# Update section types using the mapping
|
1790 |
+
mapped_section_types = []
|
1791 |
+
for section_type in section_types:
|
1792 |
+
if section_type in section_mapping:
|
1793 |
+
mapped_section_types.append(section_mapping[section_type])
|
1794 |
+
else:
|
1795 |
+
mapped_section_types.append("verse") # Default to verse
|
1796 |
+
|
1797 |
+
# Add structure visualization with section types
|
1798 |
+
structure_visualization += "\nPredicted Song Structure:\n"
|
1799 |
+
for i, section_type in enumerate(mapped_section_types):
|
1800 |
+
if i < len(enhanced_templates):
|
1801 |
+
structure_visualization += f"Line {i+1}: [{section_type.upper()}] {enhanced_templates[i]}\n"
|
1802 |
+
|
1803 |
+
# Calculate total line count
|
1804 |
+
total_lines = len(enhanced_templates)
|
1805 |
+
verse_lines = mapped_section_types.count("verse")
|
1806 |
+
chorus_lines = mapped_section_types.count("chorus")
|
1807 |
+
bridge_lines = mapped_section_types.count("bridge")
|
1808 |
|
1809 |
+
# Add summary
|
1810 |
+
structure_visualization += f"\nTotal Lines Required: {total_lines}\n"
|
1811 |
+
structure_visualization += f"Verse Lines: {verse_lines}\n"
|
1812 |
+
structure_visualization += f"Chorus Lines: {chorus_lines}\n"
|
1813 |
+
structure_visualization += f"Bridge Lines: {bridge_lines}\n"
|
1814 |
+
|
1815 |
+
# Format templates with improved formatting for the prompt
|
1816 |
syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
|
1817 |
+
syllable_guidance += "Each line of lyrics MUST match exactly with one musical phrase/segment.\n"
|
1818 |
+
syllable_guidance += "Follow these rhythm patterns for each line (STRONG beats need stressed syllables):\n\n"
|
1819 |
+
|
1820 |
+
# Add section headers to formatted templates
|
1821 |
+
formatted_templates = []
|
1822 |
+
for i, template in enumerate(enhanced_templates):
|
1823 |
+
if i < len(mapped_section_types):
|
1824 |
+
section_type = mapped_section_types[i].upper()
|
1825 |
+
if i > 0 and mapped_section_types[i] != mapped_section_types[i-1]:
|
1826 |
+
# New section
|
1827 |
+
formatted_templates.append(f"\n[{section_type}]")
|
1828 |
+
elif i == 0:
|
1829 |
+
# First section
|
1830 |
+
formatted_templates.append(f"[{section_type}]")
|
1831 |
+
formatted_templates.append(format_syllable_templates_for_prompt([template], arrow="→", line_wrap=8))
|
1832 |
+
|
1833 |
+
syllable_guidance += "\n".join(formatted_templates)
|
1834 |
+
|
1835 |
+
# Store info for later use in traditional sections approach
|
1836 |
+
use_sections = True
|
1837 |
+
|
1838 |
+
# Use the detected section structure for traditional approach
|
1839 |
+
if verse_lines > 0:
|
1840 |
+
verse_lines = min(verse_lines, total_lines // 2) # Ensure reasonable limits
|
1841 |
+
else:
|
1842 |
+
verse_lines = total_lines // 2
|
1843 |
+
|
1844 |
+
if chorus_lines > 0:
|
1845 |
+
chorus_lines = min(chorus_lines, total_lines // 3)
|
1846 |
+
else:
|
1847 |
+
chorus_lines = total_lines // 3
|
1848 |
+
|
1849 |
+
if bridge_lines > 0:
|
1850 |
+
bridge_lines = min(bridge_lines, total_lines // 6)
|
1851 |
+
else:
|
1852 |
+
bridge_lines = 0
|
1853 |
|
|
|
|
|
1854 |
# Fallback to traditional sections if needed
|
1855 |
elif "syllables" in song_structure and song_structure["syllables"]:
|
1856 |
syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
|
1857 |
+
syllable_guidance += "Follow these syllable patterns for each section. Each line should match ONE phrase:\n\n"
|
1858 |
+
|
1859 |
+
# Count sections for visualization
|
1860 |
+
section_counts = {"verse": 0, "chorus": 0, "bridge": 0, "intro": 0, "outro": 0}
|
1861 |
|
1862 |
for section in song_structure["syllables"]:
|
1863 |
+
section_counts[section["type"]] = section_counts.get(section["type"], 0) + 1
|
1864 |
+
|
1865 |
if "syllable_template" in section:
|
1866 |
# Process to create enhanced template
|
1867 |
section_beats_info = {
|
|
|
1896 |
templates_for_verification.append(section)
|
1897 |
elif "syllable_count" in section:
|
1898 |
syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
|
1899 |
+
|
1900 |
+
# Create structure visualization
|
1901 |
+
structure_visualization += "Using traditional section-based structure:\n"
|
1902 |
+
for section_type, count in section_counts.items():
|
1903 |
+
if count > 0:
|
1904 |
+
structure_visualization += f"{section_type.capitalize()}: {count} sections\n"
|
1905 |
+
|
1906 |
+
# Set traditional section counts
|
1907 |
+
verse_lines = max(2, section_counts.get("verse", 0) * 4)
|
1908 |
+
chorus_lines = max(2, section_counts.get("chorus", 0) * 4)
|
1909 |
+
bridge_lines = max(0, section_counts.get("bridge", 0) * 2)
|
1910 |
+
|
1911 |
+
# Use sections approach
|
1912 |
+
use_sections = True
|
1913 |
|
1914 |
# If we couldn't get specific templates, use general guidance
|
1915 |
if not syllable_guidance:
|
|
|
1920 |
syllable_guidance += " - Fast tempo (>120 BPM): 4-6 syllables per line\n"
|
1921 |
syllable_guidance += " - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
|
1922 |
syllable_guidance += " - Slow tempo (<90 BPM): 8-10 syllables per line\n"
|
1923 |
+
|
1924 |
+
# Create basic structure visualization
|
1925 |
+
structure_visualization += "Using estimated structure (no detailed analysis available):\n"
|
1926 |
+
|
1927 |
+
# Calculate rough section counts based on duration
|
1928 |
+
estimated_lines = max(8, int(duration / 10))
|
1929 |
+
structure_visualization += f"Estimated total lines: {estimated_lines}\n"
|
1930 |
+
|
1931 |
+
# Set traditional section counts based on duration
|
1932 |
+
verse_lines = estimated_lines // 2
|
1933 |
+
chorus_lines = estimated_lines // 3
|
1934 |
+
bridge_lines = estimated_lines // 6 if estimated_lines > 12 else 0
|
1935 |
+
|
1936 |
+
# Use sections approach
|
1937 |
+
use_sections = True
|
1938 |
|
1939 |
# Add examples of syllable-beat alignment with enhanced format
|
1940 |
syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
|
|
|
1979 |
# Add genre guidance to the main guidance
|
1980 |
syllable_guidance += genre_guidance
|
1981 |
|
1982 |
+
# Store the syllable guidance for later use
|
1983 |
+
syllable_guidance_text = syllable_guidance
|
1984 |
+
|
1985 |
+
# Determine if we should use traditional sections or not based on structure
|
1986 |
if song_structure and "flexible_structure" in song_structure and song_structure["flexible_structure"]:
|
1987 |
# If we have more than 4 segments, it's likely not a traditional song structure
|
1988 |
if "segments" in song_structure["flexible_structure"]:
|
|
|
1990 |
if len(segments) > 4:
|
1991 |
use_sections = False
|
1992 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1993 |
# Create enhanced prompt with better rhythm alignment instructions
|
1994 |
if use_sections:
|
1995 |
# Traditional approach with sections
|
|
|
2020 |
3. Count syllables carefully to ensure they match the pattern precisely
|
2021 |
4. Test your line against the pattern by mapping each syllable
|
2022 |
|
2023 |
+
IMPORTANT: Each line of lyrics must match exactly to ONE musical phrase/segment.
|
2024 |
+
|
2025 |
The lyrics should:
|
2026 |
- Perfectly capture the essence and style of {genre} music
|
2027 |
- Express the {primary_emotion} emotion and {primary_theme} theme
|
2028 |
+
- Follow the structure patterns provided above
|
|
|
|
|
|
|
|
|
2029 |
- Be completely original
|
2030 |
- Match the song duration of {duration:.1f} seconds
|
2031 |
|
|
|
2067 |
3. Count syllables carefully to ensure they match the pattern precisely
|
2068 |
4. Test your line against the pattern by mapping each syllable
|
2069 |
|
2070 |
+
CRITICAL: Each line of lyrics must match exactly to ONE musical phrase/segment.
|
2071 |
+
|
2072 |
For perfect alignment examples:
|
2073 |
- "FEEL the RHY-thm in your SOUL" – stressed syllables on strong beats
|
2074 |
- "to-DAY we DANCE a-LONG" – natural speech stress matches musical stress
|
|
|
2081 |
- Maintain a consistent theme throughout
|
2082 |
- Match the audio segment duration of {duration:.1f} seconds
|
2083 |
|
2084 |
+
Include any section labels like [Verse] or [Chorus] as indicated in the rhythm patterns above.
|
2085 |
+
Each line of lyrics must follow the corresponding segment's rhythm pattern EXACTLY.
|
2086 |
|
2087 |
IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
|
2088 |
where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
|
|
|
2225 |
# No significant issues detected
|
2226 |
lyrics = verified_lyrics
|
2227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2228 |
# Check if we have the [RHYTHM_ANALYSIS_SECTION] tag
|
2229 |
if "[RHYTHM_ANALYSIS_SECTION]" in lyrics:
|
2230 |
# Split at our custom marker
|
|
|
2243 |
# No analysis found, add a minimal one
|
2244 |
lyrics = lyrics + "\n\n[Note: Rhythm Analysis]\nNo rhythm issues detected. All syllables align well with the beat pattern."
|
2245 |
|
|
|
|
|
|
|
2246 |
# Before returning, add syllable analysis and prompt template
|
2247 |
if isinstance(lyrics, str):
|
2248 |
# Extract clean lyrics and analysis
|
|
|
2261 |
if templates_for_verification:
|
2262 |
syllable_analysis += "Template Analysis:\n"
|
2263 |
for i, template in enumerate(templates_for_verification):
|
2264 |
+
if i < min(len(templates_for_verification), 30): # Limit to 30 to avoid overwhelming output
|
2265 |
+
syllable_analysis += f"Line {i+1}:\n"
|
2266 |
+
if isinstance(template, dict):
|
2267 |
+
if "syllable_template" in template:
|
2268 |
+
syllable_analysis += f" Template: {template['syllable_template']}\n"
|
2269 |
+
if "syllable_count" in template:
|
2270 |
+
syllable_analysis += f" Expected syllables: {template['syllable_count']}\n"
|
2271 |
+
elif isinstance(template, str):
|
2272 |
+
syllable_analysis += f" Template: {template}\n"
|
2273 |
+
syllable_analysis += "\n"
|
2274 |
+
|
2275 |
+
if len(templates_for_verification) > 30:
|
2276 |
+
syllable_analysis += f"... and {len(templates_for_verification) - 30} more lines\n\n"
|
2277 |
+
|
2278 |
+
# Add structure visualization to syllable analysis
|
2279 |
+
syllable_analysis += "\n" + structure_visualization
|
2280 |
|
2281 |
# Create prompt template
|
2282 |
prompt_template = "=== PROMPT TEMPLATE ===\n\n"
|
|
|
2346 |
|
2347 |
# Calculate detailed song structure for better lyrics alignment
|
2348 |
try:
|
2349 |
+
# Enhanced song structure calculation for precise lyrics matching
|
2350 |
+
y, sr = load_audio(audio_file, SAMPLE_RATE)
|
2351 |
+
|
2352 |
+
# Analyze beats and phrases for music-aligned lyrics
|
2353 |
+
beats_info = detect_beats(y, sr)
|
2354 |
+
sections_info = detect_sections(y, sr)
|
2355 |
+
|
2356 |
+
# Create structured segments for precise line-by-line matching
|
2357 |
+
segments = []
|
2358 |
+
|
2359 |
+
# Try to break audio into meaningful segments based on sections
|
2360 |
+
# Each segment will correspond to one line of lyrics
|
2361 |
+
if sections_info and len(sections_info) > 1:
|
2362 |
+
min_segment_duration = 1.5 # Minimum 1.5 seconds per segment
|
2363 |
+
|
2364 |
+
for section in sections_info:
|
2365 |
+
section_start = section["start"]
|
2366 |
+
section_end = section["end"]
|
2367 |
+
section_duration = section["duration"]
|
2368 |
+
|
2369 |
+
# For very short sections, add as a single segment
|
2370 |
+
if section_duration < min_segment_duration * 1.5:
|
2371 |
+
segments.append({
|
2372 |
+
"start": section_start,
|
2373 |
+
"end": section_end
|
2374 |
+
})
|
2375 |
+
else:
|
2376 |
+
# Calculate ideal number of segments for this section
|
2377 |
+
# based on its duration - aiming for 2-4 second segments
|
2378 |
+
ideal_segment_duration = 3.0 # Target 3 seconds per segment
|
2379 |
+
segment_count = max(1, int(section_duration / ideal_segment_duration))
|
2380 |
+
|
2381 |
+
# Create evenly-spaced segments within this section
|
2382 |
+
segment_duration = section_duration / segment_count
|
2383 |
+
for i in range(segment_count):
|
2384 |
+
segment_start = section_start + i * segment_duration
|
2385 |
+
segment_end = segment_start + segment_duration
|
2386 |
+
segments.append({
|
2387 |
+
"start": segment_start,
|
2388 |
+
"end": segment_end
|
2389 |
+
})
|
2390 |
+
# If no good sections found, create segments based on beats
|
2391 |
+
elif beats_info and len(beats_info["beat_times"]) > 4:
|
2392 |
+
beats = beats_info["beat_times"]
|
2393 |
+
time_signature = beats_info.get("time_signature", 4)
|
2394 |
+
|
2395 |
+
# Target one segment per musical measure (typically 4 beats)
|
2396 |
+
measure_size = time_signature
|
2397 |
+
for i in range(0, len(beats), measure_size):
|
2398 |
+
if i + 1 < len(beats): # Need at least 2 beats for a meaningful segment
|
2399 |
+
measure_start = beats[i]
|
2400 |
+
# If we have enough beats for the full measure
|
2401 |
+
if i + measure_size < len(beats):
|
2402 |
+
measure_end = beats[i + measure_size]
|
2403 |
+
else:
|
2404 |
+
# Use available beats and extrapolate for the last measure
|
2405 |
+
if i > 0:
|
2406 |
+
beat_interval = beats[i] - beats[i-1]
|
2407 |
+
measure_end = beats[-1] + (beat_interval * (measure_size - (len(beats) - i)))
|
2408 |
+
else:
|
2409 |
+
measure_end = audio_data["duration"]
|
2410 |
+
|
2411 |
+
segments.append({
|
2412 |
+
"start": measure_start,
|
2413 |
+
"end": measure_end
|
2414 |
+
})
|
2415 |
+
# Last resort: simple time-based segments
|
2416 |
+
else:
|
2417 |
+
# Create segments of approximately 3 seconds each
|
2418 |
+
segment_duration = 3.0
|
2419 |
+
total_segments = max(4, int(audio_data["duration"] / segment_duration))
|
2420 |
+
segment_duration = audio_data["duration"] / total_segments
|
2421 |
+
|
2422 |
+
for i in range(total_segments):
|
2423 |
+
segment_start = i * segment_duration
|
2424 |
+
segment_end = segment_start + segment_duration
|
2425 |
+
segments.append({
|
2426 |
+
"start": segment_start,
|
2427 |
+
"end": segment_end
|
2428 |
+
})
|
2429 |
+
|
2430 |
+
# Create a flexible structure with the segments
|
2431 |
+
flexible_structure = {
|
2432 |
+
"beats": beats_info,
|
2433 |
+
"segments": segments
|
2434 |
+
}
|
2435 |
+
|
2436 |
+
# Add to song structure
|
2437 |
+
song_structure = {
|
2438 |
+
"beats": beats_info,
|
2439 |
+
"sections": sections_info,
|
2440 |
+
"flexible_structure": flexible_structure
|
2441 |
+
}
|
2442 |
+
|
2443 |
+
# Add syllable counts to each section
|
2444 |
+
song_structure["syllables"] = []
|
2445 |
+
for section in sections_info:
|
2446 |
+
# Create syllable templates for sections
|
2447 |
+
section_beats_info = {
|
2448 |
+
"beat_times": [beat for beat in beats_info["beat_times"]
|
2449 |
+
if section["start"] <= beat < section["end"]],
|
2450 |
+
"tempo": beats_info.get("tempo", 120)
|
2451 |
+
}
|
2452 |
+
if "beat_strengths" in beats_info:
|
2453 |
+
section_beats_info["beat_strengths"] = [
|
2454 |
+
strength for i, strength in enumerate(beats_info["beat_strengths"])
|
2455 |
+
if i < len(beats_info["beat_times"]) and
|
2456 |
+
section["start"] <= beats_info["beat_times"][i] < section["end"]
|
2457 |
+
]
|
2458 |
+
|
2459 |
+
# Get a syllable count based on section duration and tempo
|
2460 |
+
syllable_count = int(section["duration"] * (beats_info.get("tempo", 120) / 60) * 1.5)
|
2461 |
+
|
2462 |
+
section_info = {
|
2463 |
+
"type": section["type"],
|
2464 |
+
"start": section["start"],
|
2465 |
+
"end": section["end"],
|
2466 |
+
"duration": section["duration"],
|
2467 |
+
"syllable_count": syllable_count,
|
2468 |
+
"beat_count": len(section_beats_info["beat_times"])
|
2469 |
+
}
|
2470 |
+
|
2471 |
+
# Try to create a more detailed syllable template
|
2472 |
+
if len(section_beats_info["beat_times"]) >= 2:
|
2473 |
+
section_info["syllable_template"] = create_flexible_syllable_templates(
|
2474 |
+
section_beats_info,
|
2475 |
+
genre=top_genres[0][0]
|
2476 |
+
)
|
2477 |
+
|
2478 |
+
song_structure["syllables"].append(section_info)
|
2479 |
+
|
2480 |
+
print(f"Successfully analyzed song structure with {len(segments)} segments")
|
2481 |
+
|
2482 |
except Exception as e:
|
2483 |
print(f"Error analyzing song structure: {str(e)}")
|
2484 |
# Continue with a simpler approach if this fails
|