root commited on
Commit
173048d
·
1 Parent(s): e922466
Files changed (1) hide show
  1. app.py +317 -123
app.py CHANGED
@@ -1677,6 +1677,11 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
1677
  syllable_guidance = ""
1678
  templates_for_verification = []
1679
 
 
 
 
 
 
1680
  if song_structure:
1681
  # Try to use flexible structure if available
1682
  if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
@@ -1685,15 +1690,22 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
1685
  # Get the segments
1686
  segments = flexible["segments"]
1687
 
 
 
 
 
1688
  # Process each segment to create enhanced rhythmic templates
1689
  enhanced_templates = []
1690
 
1691
  for i, segment in enumerate(segments):
1692
- if i < 15: # Limit to 15 lines to keep prompt manageable
1693
  # Get the beat information for this segment
1694
  segment_start = segment["start"]
1695
  segment_end = segment["end"]
1696
 
 
 
 
1697
  # Find beats within this segment
1698
  segment_beats = []
1699
  beat_times = flexible["beats"]["beat_times"]
@@ -1727,24 +1739,129 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
1727
  )
1728
  enhanced_templates.append(enhanced_template)
1729
  templates_for_verification.append(enhanced_template)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1730
 
1731
- # Format templates with improved formatting
 
 
 
 
 
 
1732
  syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
1733
- syllable_guidance += "Match each line exactly to this rhythm pattern (STRONG beats need stressed syllables):\n\n"
1734
- syllable_guidance += format_syllable_templates_for_prompt(
1735
- enhanced_templates,
1736
- arrow="→",
1737
- line_wrap=8
1738
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1739
 
1740
- # Note: The enhanced formatter now automatically includes explanations
1741
-
1742
  # Fallback to traditional sections if needed
1743
  elif "syllables" in song_structure and song_structure["syllables"]:
1744
  syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
1745
- syllable_guidance += "Follow these syllable patterns for each section:\n\n"
 
 
 
1746
 
1747
  for section in song_structure["syllables"]:
 
 
1748
  if "syllable_template" in section:
1749
  # Process to create enhanced template
1750
  section_beats_info = {
@@ -1779,6 +1896,20 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
1779
  templates_for_verification.append(section)
1780
  elif "syllable_count" in section:
1781
  syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1782
 
1783
  # If we couldn't get specific templates, use general guidance
1784
  if not syllable_guidance:
@@ -1789,6 +1920,21 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
1789
  syllable_guidance += " - Fast tempo (>120 BPM): 4-6 syllables per line\n"
1790
  syllable_guidance += " - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
1791
  syllable_guidance += " - Slow tempo (<90 BPM): 8-10 syllables per line\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1792
 
1793
  # Add examples of syllable-beat alignment with enhanced format
1794
  syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
@@ -1833,8 +1979,10 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
1833
  # Add genre guidance to the main guidance
1834
  syllable_guidance += genre_guidance
1835
 
1836
- # Determine if we should use traditional sections or not
1837
- use_sections = True
 
 
1838
  if song_structure and "flexible_structure" in song_structure and song_structure["flexible_structure"]:
1839
  # If we have more than 4 segments, it's likely not a traditional song structure
1840
  if "segments" in song_structure["flexible_structure"]:
@@ -1842,73 +1990,6 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
1842
  if len(segments) > 4:
1843
  use_sections = False
1844
 
1845
- # Calculate appropriate lyrics length and section distribution
1846
- try:
1847
- if song_structure and "beats" in song_structure:
1848
- beats_info = song_structure["beats"]
1849
- tempo = beats_info.get("tempo", 120)
1850
- time_signature = beats_info.get("time_signature", 4)
1851
- lines_structure = calculate_lyrics_length(duration, tempo, time_signature)
1852
-
1853
- # Handle both possible return types
1854
- if isinstance(lines_structure, dict):
1855
- total_lines = lines_structure["lines_count"]
1856
-
1857
- # Extract section line counts if available
1858
- verse_lines = 0
1859
- chorus_lines = 0
1860
- bridge_lines = 0
1861
-
1862
- for section in lines_structure["sections"]:
1863
- if section["type"] == "verse":
1864
- verse_lines = section["lines"]
1865
- elif section["type"] == "chorus":
1866
- chorus_lines = section["lines"]
1867
- elif section["type"] == "bridge":
1868
- bridge_lines = section["lines"]
1869
- else:
1870
- # The function returned just an integer (old behavior)
1871
- total_lines = lines_structure
1872
-
1873
- # Default section distribution based on total lines
1874
- if total_lines <= 6:
1875
- verse_lines = 2
1876
- chorus_lines = 2
1877
- bridge_lines = 0
1878
- elif total_lines <= 10:
1879
- verse_lines = 3
1880
- chorus_lines = 2
1881
- bridge_lines = 0
1882
- else:
1883
- verse_lines = 3
1884
- chorus_lines = 2
1885
- bridge_lines = 2
1886
- else:
1887
- # Fallback to simple calculation
1888
- total_lines = max(4, int(duration / 10))
1889
-
1890
- # Default section distribution
1891
- if total_lines <= 6:
1892
- verse_lines = 2
1893
- chorus_lines = 2
1894
- bridge_lines = 0
1895
- elif total_lines <= 10:
1896
- verse_lines = 3
1897
- chorus_lines = 2
1898
- bridge_lines = 0
1899
- else:
1900
- verse_lines = 3
1901
- chorus_lines = 2
1902
- bridge_lines = 2
1903
- except Exception as e:
1904
- print(f"Error calculating lyrics length: {str(e)}")
1905
- total_lines = max(4, int(duration / 10))
1906
-
1907
- # Default section distribution
1908
- verse_lines = 3
1909
- chorus_lines = 2
1910
- bridge_lines = 0
1911
-
1912
  # Create enhanced prompt with better rhythm alignment instructions
1913
  if use_sections:
1914
  # Traditional approach with sections
@@ -1939,14 +2020,12 @@ Think step by step about how to match words to the rhythm pattern:
1939
  3. Count syllables carefully to ensure they match the pattern precisely
1940
  4. Test your line against the pattern by mapping each syllable
1941
 
 
 
1942
  The lyrics should:
1943
  - Perfectly capture the essence and style of {genre} music
1944
  - Express the {primary_emotion} emotion and {primary_theme} theme
1945
- - Be approximately {total_lines} lines long
1946
- - Follow this structure:
1947
- * Verse: {verse_lines} lines
1948
- * Chorus: {chorus_lines} lines
1949
- * {f'Bridge: {bridge_lines} lines' if bridge_lines > 0 else ''}
1950
  - Be completely original
1951
  - Match the song duration of {duration:.1f} seconds
1952
 
@@ -1988,6 +2067,8 @@ Think step by step about how to match words to the rhythm pattern:
1988
  3. Count syllables carefully to ensure they match the pattern precisely
1989
  4. Test your line against the pattern by mapping each syllable
1990
 
 
 
1991
  For perfect alignment examples:
1992
  - "FEEL the RHY-thm in your SOUL" – stressed syllables on strong beats
1993
  - "to-DAY we DANCE a-LONG" – natural speech stress matches musical stress
@@ -2000,8 +2081,8 @@ The lyrics should:
2000
  - Maintain a consistent theme throughout
2001
  - Match the audio segment duration of {duration:.1f} seconds
2002
 
2003
- DON'T include any section labels like [Verse] or [Chorus] unless specifically instructed.
2004
- Instead, write lyrics that flow naturally and match the music's rhythm precisely.
2005
 
2006
  IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
2007
  where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
@@ -2144,29 +2225,6 @@ Improved lyrics with fixed rhythm:
2144
  # No significant issues detected
2145
  lyrics = verified_lyrics
2146
 
2147
- # Add section labels if they're not present and we're using the traditional approach
2148
- if use_sections and "Verse" not in lyrics and "Chorus" not in lyrics:
2149
- lines = lyrics.split('\n')
2150
- formatted_lyrics = []
2151
-
2152
- line_count = 0
2153
- for i, line in enumerate(lines):
2154
- if not line.strip():
2155
- formatted_lyrics.append(line)
2156
- continue
2157
-
2158
- if line_count == 0:
2159
- formatted_lyrics.append("[Verse]")
2160
- elif line_count == verse_lines:
2161
- formatted_lyrics.append("\n[Chorus]")
2162
- elif line_count == verse_lines + chorus_lines and bridge_lines > 0:
2163
- formatted_lyrics.append("\n[Bridge]")
2164
-
2165
- formatted_lyrics.append(line)
2166
- line_count += 1
2167
-
2168
- lyrics = '\n'.join(formatted_lyrics)
2169
-
2170
  # Check if we have the [RHYTHM_ANALYSIS_SECTION] tag
2171
  if "[RHYTHM_ANALYSIS_SECTION]" in lyrics:
2172
  # Split at our custom marker
@@ -2185,9 +2243,6 @@ Improved lyrics with fixed rhythm:
2185
  # No analysis found, add a minimal one
2186
  lyrics = lyrics + "\n\n[Note: Rhythm Analysis]\nNo rhythm issues detected. All syllables align well with the beat pattern."
2187
 
2188
- # Store the syllable guidance for later use
2189
- syllable_guidance_text = syllable_guidance
2190
-
2191
  # Before returning, add syllable analysis and prompt template
2192
  if isinstance(lyrics, str):
2193
  # Extract clean lyrics and analysis
@@ -2206,15 +2261,22 @@ Improved lyrics with fixed rhythm:
2206
  if templates_for_verification:
2207
  syllable_analysis += "Template Analysis:\n"
2208
  for i, template in enumerate(templates_for_verification):
2209
- syllable_analysis += f"Line {i+1}:\n"
2210
- if isinstance(template, dict):
2211
- if "syllable_template" in template:
2212
- syllable_analysis += f" Template: {template['syllable_template']}\n"
2213
- if "syllable_count" in template:
2214
- syllable_analysis += f" Expected syllables: {template['syllable_count']}\n"
2215
- elif isinstance(template, str):
2216
- syllable_analysis += f" Template: {template}\n"
2217
- syllable_analysis += "\n"
 
 
 
 
 
 
 
2218
 
2219
  # Create prompt template
2220
  prompt_template = "=== PROMPT TEMPLATE ===\n\n"
@@ -2284,7 +2346,139 @@ def process_audio(audio_file):
2284
 
2285
  # Calculate detailed song structure for better lyrics alignment
2286
  try:
2287
- song_structure = calculate_detailed_song_structure(audio_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2288
  except Exception as e:
2289
  print(f"Error analyzing song structure: {str(e)}")
2290
  # Continue with a simpler approach if this fails
 
1677
  syllable_guidance = ""
1678
  templates_for_verification = []
1679
 
1680
+ # Create a structure visualization to help with lyrics-music matching
1681
+ structure_visualization = "=== MUSIC-LYRICS STRUCTURE MATCHING ===\n\n"
1682
+ structure_visualization += f"Song Duration: {duration:.1f} seconds\n"
1683
+ structure_visualization += f"Tempo: {tempo:.1f} BPM\n\n"
1684
+
1685
  if song_structure:
1686
  # Try to use flexible structure if available
1687
  if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
 
1690
  # Get the segments
1691
  segments = flexible["segments"]
1692
 
1693
+ # Add structure visualization
1694
+ structure_visualization += f"Total segments: {len(segments)}\n"
1695
+ structure_visualization += "Each segment represents one musical phrase for which you should write ONE line of lyrics.\n\n"
1696
+
1697
  # Process each segment to create enhanced rhythmic templates
1698
  enhanced_templates = []
1699
 
1700
  for i, segment in enumerate(segments):
1701
+ if i < 30: # Extend limit to 30 lines to handle longer songs
1702
  # Get the beat information for this segment
1703
  segment_start = segment["start"]
1704
  segment_end = segment["end"]
1705
 
1706
+ # Add segment info to visualization
1707
+ structure_visualization += f"Segment {i+1}: {segment_start:.1f}s - {segment_end:.1f}s (duration: {segment_end-segment_start:.1f}s)\n"
1708
+
1709
  # Find beats within this segment
1710
  segment_beats = []
1711
  beat_times = flexible["beats"]["beat_times"]
 
1739
  )
1740
  enhanced_templates.append(enhanced_template)
1741
  templates_for_verification.append(enhanced_template)
1742
+
1743
+ # Add template to visualization
1744
+ structure_visualization += f" Template: {enhanced_template}\n"
1745
+
1746
+ # Use these templates to determine verse/chorus structure based on similar patterns
1747
+ # This is a simple version - could be enhanced with more sophisticated pattern detection
1748
+ section_types = []
1749
+ pattern_groups = {}
1750
+
1751
+ for i, template in enumerate(enhanced_templates):
1752
+ # Create simplified version for pattern matching
1753
+ simple_pattern = template.replace("(", "").replace(")", "").replace(":", "")
1754
+
1755
+ # Check if this pattern is similar to any we've seen
1756
+ found_match = False
1757
+ for group, patterns in pattern_groups.items():
1758
+ if any(simple_pattern == p.replace("(", "").replace(")", "").replace(":", "") for p in patterns):
1759
+ pattern_groups[group].append(template)
1760
+ section_types.append(group)
1761
+ found_match = True
1762
+ break
1763
+
1764
+ if not found_match:
1765
+ # New pattern type
1766
+ group_name = f"Group_{len(pattern_groups) + 1}"
1767
+ pattern_groups[group_name] = [template]
1768
+ section_types.append(group_name)
1769
+
1770
+ # Map pattern groups to verse/chorus/bridge based on common structures
1771
+ section_mapping = {}
1772
+ if len(pattern_groups) >= 1:
1773
+ # Assume the most common pattern is the verse
1774
+ most_common = max(pattern_groups.items(), key=lambda x: len(x[1]))[0]
1775
+ section_mapping[most_common] = "verse"
1776
+
1777
+ if len(pattern_groups) >= 2:
1778
+ # Second most common might be chorus
1779
+ sorted_groups = sorted(pattern_groups.items(), key=lambda x: len(x[1]), reverse=True)
1780
+ if len(sorted_groups) > 1:
1781
+ section_mapping[sorted_groups[1][0]] = "chorus"
1782
+
1783
+ if len(pattern_groups) >= 3:
1784
+ # Third pattern could be bridge
1785
+ sorted_groups = sorted(pattern_groups.items(), key=lambda x: len(x[1]), reverse=True)
1786
+ if len(sorted_groups) > 2:
1787
+ section_mapping[sorted_groups[2][0]] = "bridge"
1788
+
1789
+ # Update section types using the mapping
1790
+ mapped_section_types = []
1791
+ for section_type in section_types:
1792
+ if section_type in section_mapping:
1793
+ mapped_section_types.append(section_mapping[section_type])
1794
+ else:
1795
+ mapped_section_types.append("verse") # Default to verse
1796
+
1797
+ # Add structure visualization with section types
1798
+ structure_visualization += "\nPredicted Song Structure:\n"
1799
+ for i, section_type in enumerate(mapped_section_types):
1800
+ if i < len(enhanced_templates):
1801
+ structure_visualization += f"Line {i+1}: [{section_type.upper()}] {enhanced_templates[i]}\n"
1802
+
1803
+ # Calculate total line count
1804
+ total_lines = len(enhanced_templates)
1805
+ verse_lines = mapped_section_types.count("verse")
1806
+ chorus_lines = mapped_section_types.count("chorus")
1807
+ bridge_lines = mapped_section_types.count("bridge")
1808
 
1809
+ # Add summary
1810
+ structure_visualization += f"\nTotal Lines Required: {total_lines}\n"
1811
+ structure_visualization += f"Verse Lines: {verse_lines}\n"
1812
+ structure_visualization += f"Chorus Lines: {chorus_lines}\n"
1813
+ structure_visualization += f"Bridge Lines: {bridge_lines}\n"
1814
+
1815
+ # Format templates with improved formatting for the prompt
1816
  syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
1817
+ syllable_guidance += "Each line of lyrics MUST match exactly with one musical phrase/segment.\n"
1818
+ syllable_guidance += "Follow these rhythm patterns for each line (STRONG beats need stressed syllables):\n\n"
1819
+
1820
+ # Add section headers to formatted templates
1821
+ formatted_templates = []
1822
+ for i, template in enumerate(enhanced_templates):
1823
+ if i < len(mapped_section_types):
1824
+ section_type = mapped_section_types[i].upper()
1825
+ if i > 0 and mapped_section_types[i] != mapped_section_types[i-1]:
1826
+ # New section
1827
+ formatted_templates.append(f"\n[{section_type}]")
1828
+ elif i == 0:
1829
+ # First section
1830
+ formatted_templates.append(f"[{section_type}]")
1831
+ formatted_templates.append(format_syllable_templates_for_prompt([template], arrow="→", line_wrap=8))
1832
+
1833
+ syllable_guidance += "\n".join(formatted_templates)
1834
+
1835
+ # Store info for later use in traditional sections approach
1836
+ use_sections = True
1837
+
1838
+ # Use the detected section structure for traditional approach
1839
+ if verse_lines > 0:
1840
+ verse_lines = min(verse_lines, total_lines // 2) # Ensure reasonable limits
1841
+ else:
1842
+ verse_lines = total_lines // 2
1843
+
1844
+ if chorus_lines > 0:
1845
+ chorus_lines = min(chorus_lines, total_lines // 3)
1846
+ else:
1847
+ chorus_lines = total_lines // 3
1848
+
1849
+ if bridge_lines > 0:
1850
+ bridge_lines = min(bridge_lines, total_lines // 6)
1851
+ else:
1852
+ bridge_lines = 0
1853
 
 
 
1854
  # Fallback to traditional sections if needed
1855
  elif "syllables" in song_structure and song_structure["syllables"]:
1856
  syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
1857
+ syllable_guidance += "Follow these syllable patterns for each section. Each line should match ONE phrase:\n\n"
1858
+
1859
+ # Count sections for visualization
1860
+ section_counts = {"verse": 0, "chorus": 0, "bridge": 0, "intro": 0, "outro": 0}
1861
 
1862
  for section in song_structure["syllables"]:
1863
+ section_counts[section["type"]] = section_counts.get(section["type"], 0) + 1
1864
+
1865
  if "syllable_template" in section:
1866
  # Process to create enhanced template
1867
  section_beats_info = {
 
1896
  templates_for_verification.append(section)
1897
  elif "syllable_count" in section:
1898
  syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
1899
+
1900
+ # Create structure visualization
1901
+ structure_visualization += "Using traditional section-based structure:\n"
1902
+ for section_type, count in section_counts.items():
1903
+ if count > 0:
1904
+ structure_visualization += f"{section_type.capitalize()}: {count} sections\n"
1905
+
1906
+ # Set traditional section counts
1907
+ verse_lines = max(2, section_counts.get("verse", 0) * 4)
1908
+ chorus_lines = max(2, section_counts.get("chorus", 0) * 4)
1909
+ bridge_lines = max(0, section_counts.get("bridge", 0) * 2)
1910
+
1911
+ # Use sections approach
1912
+ use_sections = True
1913
 
1914
  # If we couldn't get specific templates, use general guidance
1915
  if not syllable_guidance:
 
1920
  syllable_guidance += " - Fast tempo (>120 BPM): 4-6 syllables per line\n"
1921
  syllable_guidance += " - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
1922
  syllable_guidance += " - Slow tempo (<90 BPM): 8-10 syllables per line\n"
1923
+
1924
+ # Create basic structure visualization
1925
+ structure_visualization += "Using estimated structure (no detailed analysis available):\n"
1926
+
1927
+ # Calculate rough section counts based on duration
1928
+ estimated_lines = max(8, int(duration / 10))
1929
+ structure_visualization += f"Estimated total lines: {estimated_lines}\n"
1930
+
1931
+ # Set traditional section counts based on duration
1932
+ verse_lines = estimated_lines // 2
1933
+ chorus_lines = estimated_lines // 3
1934
+ bridge_lines = estimated_lines // 6 if estimated_lines > 12 else 0
1935
+
1936
+ # Use sections approach
1937
+ use_sections = True
1938
 
1939
  # Add examples of syllable-beat alignment with enhanced format
1940
  syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
 
1979
  # Add genre guidance to the main guidance
1980
  syllable_guidance += genre_guidance
1981
 
1982
+ # Store the syllable guidance for later use
1983
+ syllable_guidance_text = syllable_guidance
1984
+
1985
+ # Determine if we should use traditional sections or not based on structure
1986
  if song_structure and "flexible_structure" in song_structure and song_structure["flexible_structure"]:
1987
  # If we have more than 4 segments, it's likely not a traditional song structure
1988
  if "segments" in song_structure["flexible_structure"]:
 
1990
  if len(segments) > 4:
1991
  use_sections = False
1992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1993
  # Create enhanced prompt with better rhythm alignment instructions
1994
  if use_sections:
1995
  # Traditional approach with sections
 
2020
  3. Count syllables carefully to ensure they match the pattern precisely
2021
  4. Test your line against the pattern by mapping each syllable
2022
 
2023
+ IMPORTANT: Each line of lyrics must match exactly to ONE musical phrase/segment.
2024
+
2025
  The lyrics should:
2026
  - Perfectly capture the essence and style of {genre} music
2027
  - Express the {primary_emotion} emotion and {primary_theme} theme
2028
+ - Follow the structure patterns provided above
 
 
 
 
2029
  - Be completely original
2030
  - Match the song duration of {duration:.1f} seconds
2031
 
 
2067
  3. Count syllables carefully to ensure they match the pattern precisely
2068
  4. Test your line against the pattern by mapping each syllable
2069
 
2070
+ CRITICAL: Each line of lyrics must match exactly to ONE musical phrase/segment.
2071
+
2072
  For perfect alignment examples:
2073
  - "FEEL the RHY-thm in your SOUL" – stressed syllables on strong beats
2074
  - "to-DAY we DANCE a-LONG" – natural speech stress matches musical stress
 
2081
  - Maintain a consistent theme throughout
2082
  - Match the audio segment duration of {duration:.1f} seconds
2083
 
2084
+ Include any section labels like [Verse] or [Chorus] as indicated in the rhythm patterns above.
2085
+ Each line of lyrics must follow the corresponding segment's rhythm pattern EXACTLY.
2086
 
2087
  IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
2088
  where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
 
2225
  # No significant issues detected
2226
  lyrics = verified_lyrics
2227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2228
  # Check if we have the [RHYTHM_ANALYSIS_SECTION] tag
2229
  if "[RHYTHM_ANALYSIS_SECTION]" in lyrics:
2230
  # Split at our custom marker
 
2243
  # No analysis found, add a minimal one
2244
  lyrics = lyrics + "\n\n[Note: Rhythm Analysis]\nNo rhythm issues detected. All syllables align well with the beat pattern."
2245
 
 
 
 
2246
  # Before returning, add syllable analysis and prompt template
2247
  if isinstance(lyrics, str):
2248
  # Extract clean lyrics and analysis
 
2261
  if templates_for_verification:
2262
  syllable_analysis += "Template Analysis:\n"
2263
  for i, template in enumerate(templates_for_verification):
2264
+ if i < min(len(templates_for_verification), 30): # Limit to 30 to avoid overwhelming output
2265
+ syllable_analysis += f"Line {i+1}:\n"
2266
+ if isinstance(template, dict):
2267
+ if "syllable_template" in template:
2268
+ syllable_analysis += f" Template: {template['syllable_template']}\n"
2269
+ if "syllable_count" in template:
2270
+ syllable_analysis += f" Expected syllables: {template['syllable_count']}\n"
2271
+ elif isinstance(template, str):
2272
+ syllable_analysis += f" Template: {template}\n"
2273
+ syllable_analysis += "\n"
2274
+
2275
+ if len(templates_for_verification) > 30:
2276
+ syllable_analysis += f"... and {len(templates_for_verification) - 30} more lines\n\n"
2277
+
2278
+ # Add structure visualization to syllable analysis
2279
+ syllable_analysis += "\n" + structure_visualization
2280
 
2281
  # Create prompt template
2282
  prompt_template = "=== PROMPT TEMPLATE ===\n\n"
 
2346
 
2347
  # Calculate detailed song structure for better lyrics alignment
2348
  try:
2349
+ # Enhanced song structure calculation for precise lyrics matching
2350
+ y, sr = load_audio(audio_file, SAMPLE_RATE)
2351
+
2352
+ # Analyze beats and phrases for music-aligned lyrics
2353
+ beats_info = detect_beats(y, sr)
2354
+ sections_info = detect_sections(y, sr)
2355
+
2356
+ # Create structured segments for precise line-by-line matching
2357
+ segments = []
2358
+
2359
+ # Try to break audio into meaningful segments based on sections
2360
+ # Each segment will correspond to one line of lyrics
2361
+ if sections_info and len(sections_info) > 1:
2362
+ min_segment_duration = 1.5 # Minimum 1.5 seconds per segment
2363
+
2364
+ for section in sections_info:
2365
+ section_start = section["start"]
2366
+ section_end = section["end"]
2367
+ section_duration = section["duration"]
2368
+
2369
+ # For very short sections, add as a single segment
2370
+ if section_duration < min_segment_duration * 1.5:
2371
+ segments.append({
2372
+ "start": section_start,
2373
+ "end": section_end
2374
+ })
2375
+ else:
2376
+ # Calculate ideal number of segments for this section
2377
+ # based on its duration - aiming for 2-4 second segments
2378
+ ideal_segment_duration = 3.0 # Target 3 seconds per segment
2379
+ segment_count = max(1, int(section_duration / ideal_segment_duration))
2380
+
2381
+ # Create evenly-spaced segments within this section
2382
+ segment_duration = section_duration / segment_count
2383
+ for i in range(segment_count):
2384
+ segment_start = section_start + i * segment_duration
2385
+ segment_end = segment_start + segment_duration
2386
+ segments.append({
2387
+ "start": segment_start,
2388
+ "end": segment_end
2389
+ })
2390
+ # If no good sections found, create segments based on beats
2391
+ elif beats_info and len(beats_info["beat_times"]) > 4:
2392
+ beats = beats_info["beat_times"]
2393
+ time_signature = beats_info.get("time_signature", 4)
2394
+
2395
+ # Target one segment per musical measure (typically 4 beats)
2396
+ measure_size = time_signature
2397
+ for i in range(0, len(beats), measure_size):
2398
+ if i + 1 < len(beats): # Need at least 2 beats for a meaningful segment
2399
+ measure_start = beats[i]
2400
+ # If we have enough beats for the full measure
2401
+ if i + measure_size < len(beats):
2402
+ measure_end = beats[i + measure_size]
2403
+ else:
2404
+ # Use available beats and extrapolate for the last measure
2405
+ if i > 0:
2406
+ beat_interval = beats[i] - beats[i-1]
2407
+ measure_end = beats[-1] + (beat_interval * (measure_size - (len(beats) - i)))
2408
+ else:
2409
+ measure_end = audio_data["duration"]
2410
+
2411
+ segments.append({
2412
+ "start": measure_start,
2413
+ "end": measure_end
2414
+ })
2415
+ # Last resort: simple time-based segments
2416
+ else:
2417
+ # Create segments of approximately 3 seconds each
2418
+ segment_duration = 3.0
2419
+ total_segments = max(4, int(audio_data["duration"] / segment_duration))
2420
+ segment_duration = audio_data["duration"] / total_segments
2421
+
2422
+ for i in range(total_segments):
2423
+ segment_start = i * segment_duration
2424
+ segment_end = segment_start + segment_duration
2425
+ segments.append({
2426
+ "start": segment_start,
2427
+ "end": segment_end
2428
+ })
2429
+
2430
+ # Create a flexible structure with the segments
2431
+ flexible_structure = {
2432
+ "beats": beats_info,
2433
+ "segments": segments
2434
+ }
2435
+
2436
+ # Add to song structure
2437
+ song_structure = {
2438
+ "beats": beats_info,
2439
+ "sections": sections_info,
2440
+ "flexible_structure": flexible_structure
2441
+ }
2442
+
2443
+ # Add syllable counts to each section
2444
+ song_structure["syllables"] = []
2445
+ for section in sections_info:
2446
+ # Create syllable templates for sections
2447
+ section_beats_info = {
2448
+ "beat_times": [beat for beat in beats_info["beat_times"]
2449
+ if section["start"] <= beat < section["end"]],
2450
+ "tempo": beats_info.get("tempo", 120)
2451
+ }
2452
+ if "beat_strengths" in beats_info:
2453
+ section_beats_info["beat_strengths"] = [
2454
+ strength for i, strength in enumerate(beats_info["beat_strengths"])
2455
+ if i < len(beats_info["beat_times"]) and
2456
+ section["start"] <= beats_info["beat_times"][i] < section["end"]
2457
+ ]
2458
+
2459
+ # Get a syllable count based on section duration and tempo
2460
+ syllable_count = int(section["duration"] * (beats_info.get("tempo", 120) / 60) * 1.5)
2461
+
2462
+ section_info = {
2463
+ "type": section["type"],
2464
+ "start": section["start"],
2465
+ "end": section["end"],
2466
+ "duration": section["duration"],
2467
+ "syllable_count": syllable_count,
2468
+ "beat_count": len(section_beats_info["beat_times"])
2469
+ }
2470
+
2471
+ # Try to create a more detailed syllable template
2472
+ if len(section_beats_info["beat_times"]) >= 2:
2473
+ section_info["syllable_template"] = create_flexible_syllable_templates(
2474
+ section_beats_info,
2475
+ genre=top_genres[0][0]
2476
+ )
2477
+
2478
+ song_structure["syllables"].append(section_info)
2479
+
2480
+ print(f"Successfully analyzed song structure with {len(segments)} segments")
2481
+
2482
  except Exception as e:
2483
  print(f"Error analyzing song structure: {str(e)}")
2484
  # Continue with a simpler approach if this fails