root commited on
Commit
054fb90
Β·
1 Parent(s): 4ddd8f4

syllables trying second

Browse files
Files changed (1) hide show
  1. app.py +346 -256
app.py CHANGED
@@ -403,11 +403,12 @@ def detect_sections(y, sr):
403
 
404
  # New function: Create flexible syllable templates
405
  def create_flexible_syllable_templates(beats_info):
406
- """Create syllable templates based purely on beat patterns without assuming song structure."""
407
  # Get the beat times and strengths
408
  beat_times = beats_info["beat_times"]
409
  beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
410
  phrases = beats_info.get("phrases", [])
 
411
 
412
  # If no phrases were detected, create a simple division
413
  if not phrases:
@@ -418,190 +419,124 @@ def create_flexible_syllable_templates(beats_info):
418
  if end_idx - i >= 2: # Ensure at least 2 beats per phrase
419
  phrases.append(list(range(i, end_idx)))
420
 
421
- # Create syllable templates for each phrase
422
  syllable_templates = []
423
 
424
  for phrase in phrases:
425
- # Calculate appropriate syllable count for this phrase
426
- beat_count = len(phrase)
427
  phrase_strengths = [beat_strengths[i] for i in phrase if i < len(beat_strengths)]
428
- avg_strength = sum(phrase_strengths) / len(phrase_strengths) if phrase_strengths else 1.0
 
429
 
430
- # Base calculation: 1-2 syllables per beat depending on tempo
431
- tempo = beats_info.get("tempo", 120)
432
- if tempo > 120: # Fast tempo
433
- syllables_per_beat = 1.0
434
- elif tempo > 90: # Medium tempo
435
- syllables_per_beat = 1.5
436
- else: # Slow tempo
437
- syllables_per_beat = 2.0
438
-
439
- # Adjust for beat strength
440
- syllables_per_beat *= (0.8 + (avg_strength * 0.4))
441
-
442
- # Calculate total syllables for the phrase
443
- phrase_syllables = int(beat_count * syllables_per_beat)
444
- if phrase_syllables < 2:
445
- phrase_syllables = 2
446
-
447
- syllable_templates.append(str(phrase_syllables))
448
-
449
- return "-".join(syllable_templates)
450
-
451
- # New function: Analyze flexible structure
452
- def analyze_flexible_structure(audio_data):
453
- """Analyze music structure without assuming traditional song sections."""
454
- y = audio_data["waveform"]
455
- sr = audio_data["sample_rate"]
456
-
457
- # Enhanced beat detection
458
- beats_info = detect_beats(y, sr)
459
-
460
- # Identify segments with similar audio features (using MFCC)
461
- mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
462
-
463
- # Use agglomerative clustering to find segment boundaries
464
- segment_boundaries = librosa.segment.agglomerative(mfcc, 3)
465
- segment_times = librosa.frames_to_time(segment_boundaries, sr=sr)
466
-
467
- # Create segments
468
- segments = []
469
- for i in range(len(segment_times)-1):
470
- start = segment_times[i]
471
- end = segment_times[i+1]
472
-
473
- # Find beats within this segment
474
- segment_beats = []
475
- for j, time in enumerate(beats_info["beat_times"]):
476
- if start <= time < end:
477
- segment_beats.append(j)
478
-
479
- # Create syllable template for this segment
480
- if segment_beats:
481
- segment_beats_info = {
482
- "beat_times": [beats_info["beat_times"][j] for j in segment_beats],
483
- "tempo": beats_info.get("tempo", 120)
484
- }
485
 
486
- if "beat_strengths" in beats_info:
487
- segment_beats_info["beat_strengths"] = [
488
- beats_info["beat_strengths"][j] for j in segment_beats
489
- if j < len(beats_info["beat_strengths"])
490
- ]
491
-
492
- if "intervals" in beats_info:
493
- segment_beats_info["intervals"] = beats_info["intervals"]
 
494
 
495
- if "phrases" in beats_info:
496
- # Filter phrases to include only beats in this segment
497
- segment_phrases = []
498
- for phrase in beats_info["phrases"]:
499
- segment_phrase = [beat_idx for beat_idx in phrase if beat_idx in segment_beats]
500
- if len(segment_phrase) >= 2:
501
- segment_phrases.append(segment_phrase)
502
-
503
- segment_beats_info["phrases"] = segment_phrases
504
-
505
- syllable_template = create_flexible_syllable_templates(segment_beats_info)
506
  else:
507
- syllable_template = "4" # Default fallback
 
508
 
509
- segments.append({
510
- "start": start,
511
- "end": end,
512
- "duration": end - start,
513
- "syllable_template": syllable_template
514
- })
 
 
 
 
 
 
 
 
 
 
515
 
516
- return {
517
- "beats": beats_info,
518
- "segments": segments
519
- }
520
 
521
- # Enhanced estimate_syllables_per_section function
522
- def estimate_syllables_per_section(beats_info, sections):
523
- """Estimate the number of syllables needed for each section based on beats."""
524
- syllables_per_section = []
 
525
 
526
- for section in sections:
527
- # Find beats that fall within this section
528
- section_beats = [
529
- beat for beat in beats_info["beat_times"]
530
- if section["start"] <= beat < section["end"]
531
- ]
532
-
533
- # Calculate syllables based on section type and beat count
534
- beat_count = len(section_beats)
535
-
536
- # Extract beat strengths for this section if available
537
- section_beat_strengths = []
538
- if "beat_strengths" in beats_info:
539
- for i, beat_time in enumerate(beats_info["beat_times"]):
540
- if section["start"] <= beat_time < section["end"] and i < len(beats_info["beat_strengths"]):
541
- section_beat_strengths.append(beats_info["beat_strengths"][i])
542
 
543
- # Create a segment-specific beat info structure for template creation
544
- segment_beats_info = {
545
- "beat_times": section_beats,
546
- "tempo": beats_info.get("tempo", 120)
547
- }
548
-
549
- if section_beat_strengths:
550
- segment_beats_info["beat_strengths"] = section_beat_strengths
551
 
552
- if "intervals" in beats_info:
553
- segment_beats_info["intervals"] = beats_info["intervals"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
 
555
- # Create a detailed syllable template for this section
556
- syllable_template = create_flexible_syllable_templates(segment_beats_info)
557
 
558
- # Calculate estimated syllable count
559
- expected_counts = [int(count) for count in syllable_template.split("-")]
560
- total_syllables = sum(expected_counts)
 
561
 
562
- syllables_per_section.append({
563
- "type": section["type"],
564
- "start": section["start"],
565
- "end": section["end"],
566
- "duration": section["duration"],
567
- "beat_count": beat_count,
568
- "syllable_count": total_syllables,
569
- "syllable_template": syllable_template
570
- })
571
-
572
- return syllables_per_section
573
-
574
- def calculate_detailed_song_structure(audio_data):
575
- """Calculate detailed song structure for better lyrics generation."""
576
- y = audio_data["waveform"]
577
- sr = audio_data["sample_rate"]
578
-
579
- # Enhanced beat detection
580
- beats_info = detect_beats(y, sr)
581
-
582
- # Detect sections
583
- sections = detect_sections(y, sr)
584
-
585
- # Create enhanced syllable info per section
586
- syllables_info = estimate_syllables_per_section(beats_info, sections)
587
-
588
- # Get flexible structure analysis as an alternative approach
589
- try:
590
- flexible_structure = analyze_flexible_structure(audio_data)
591
- except Exception as e:
592
- print(f"Warning: Flexible structure analysis failed: {str(e)}")
593
- flexible_structure = None
594
-
595
- return {
596
- "beats": beats_info,
597
- "sections": sections,
598
- "syllables": syllables_info,
599
- "flexible_structure": flexible_structure
600
- }
601
 
602
- # New function: Verify syllable counts
603
  def verify_flexible_syllable_counts(lyrics, templates):
604
- """Verify that the generated lyrics match the required syllable counts."""
605
  # Split lyrics into lines
606
  lines = [line.strip() for line in lyrics.split("\n") if line.strip()]
607
 
@@ -616,31 +551,101 @@ def verify_flexible_syllable_counts(lyrics, templates):
616
 
617
  # Handle different template formats
618
  if isinstance(template, dict) and "syllable_template" in template:
619
- expected_counts = [int(count) for count in template["syllable_template"].split("-")]
620
  elif isinstance(template, str):
621
- expected_counts = [int(count) for count in template.split("-")]
622
  else:
623
  continue
624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  # Count actual syllables
626
  actual_count = count_syllables(line)
627
 
628
  # Calculate difference
629
- total_expected = sum(expected_counts)
630
- if abs(actual_count - total_expected) > 2: # Allow small differences
631
  verification_notes.append(f"Line {i+1}: Expected {total_expected} syllables, got {actual_count}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632
 
633
  # If we found issues, add them as notes at the end of the lyrics
634
  if verification_notes:
635
  lyrics += "\n\n[Note: Potential rhythm mismatches in these lines:]\n"
636
  lyrics += "\n".join(verification_notes)
637
- lyrics += "\n[You may want to adjust these lines to match the music's rhythm better]"
 
 
 
638
 
639
  return lyrics
640
 
641
  # Modified generate_lyrics function
642
  def generate_lyrics(genre, duration, emotion_results, song_structure=None):
643
- """Generate lyrics based on the genre, emotion, and structure analysis."""
644
  # Extract emotion and theme data from analysis results
645
  primary_emotion = emotion_results["emotion_analysis"]["primary_emotion"]
646
  primary_theme = emotion_results["theme_analysis"]["primary_theme"]
@@ -663,39 +668,119 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
663
  if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
664
  flexible = song_structure["flexible_structure"]
665
  if "segments" in flexible and flexible["segments"]:
666
- syllable_guidance = "Follow these exact syllable patterns for each line:\n"
 
667
 
668
- for i, segment in enumerate(flexible["segments"]):
 
 
 
669
  if i < 15: # Limit to 15 lines to keep prompt manageable
670
- syllable_guidance += f"Line {i+1}: {segment['syllable_template']} syllables\n"
671
- templates_for_verification.append(segment["syllable_template"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
 
673
  # Fallback to traditional sections if needed
674
  elif "syllables" in song_structure and song_structure["syllables"]:
675
- syllable_guidance = "Follow these syllable patterns for each section:\n"
 
676
 
677
  for section in song_structure["syllables"]:
678
  if "syllable_template" in section:
679
- syllable_guidance += f"[{section['type'].capitalize()}]: {section['syllable_template']} syllables per line\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  elif "syllable_count" in section:
681
  syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
682
-
683
- if "syllable_template" in section:
684
- templates_for_verification.append(section)
685
 
686
  # If we couldn't get specific templates, use general guidance
687
  if not syllable_guidance:
688
- syllable_guidance = "Make sure each line has an appropriate syllable count for singing:\n"
689
- syllable_guidance += "- For faster sections (tempo > 120 BPM): 4-6 syllables per line\n"
690
- syllable_guidance += "- For medium tempo sections: 6-8 syllables per line\n"
691
- syllable_guidance += "- For slower sections (tempo < 90 BPM): 8-10 syllables per line\n"
 
 
 
 
 
 
 
 
 
 
692
 
693
- # Add examples of syllable counting
694
- syllable_guidance += "\nExamples of syllable counting:\n"
695
- syllable_guidance += "- 'I can see the light' = 4 syllables\n"
696
- syllable_guidance += "- 'When it fades a-way' = 4 syllables\n"
697
- syllable_guidance += "- 'The sun is shin-ing bright to-day' = 8 syllables\n"
698
- syllable_guidance += "- 'I'll be wait-ing for you' = 6 syllables\n"
 
 
 
699
 
700
  # Determine if we should use traditional sections or not
701
  use_sections = True
@@ -706,55 +791,35 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
706
  if len(segments) > 4:
707
  use_sections = False
708
 
709
- # Create enhanced prompt for the LLM
710
- if use_sections:
711
- # Traditional approach with sections
712
- # Calculate appropriate lyrics length and section distribution
713
- try:
714
- if song_structure and "beats" in song_structure:
715
- beats_info = song_structure["beats"]
716
- tempo = beats_info.get("tempo", 120)
717
- time_signature = beats_info.get("time_signature", 4)
718
- lines_structure = calculate_lyrics_length(duration, tempo, time_signature)
 
719
 
720
- # Handle both possible return types
721
- if isinstance(lines_structure, dict):
722
- total_lines = lines_structure["lines_count"]
723
-
724
- # Extract section line counts if available
725
- verse_lines = 0
726
- chorus_lines = 0
727
- bridge_lines = 0
728
-
729
- for section in lines_structure["sections"]:
730
- if section["type"] == "verse":
731
- verse_lines = section["lines"]
732
- elif section["type"] == "chorus":
733
- chorus_lines = section["lines"]
734
- elif section["type"] == "bridge":
735
- bridge_lines = section["lines"]
736
- else:
737
- # The function returned just an integer (old behavior)
738
- total_lines = lines_structure
739
-
740
- # Default section distribution based on total lines
741
- if total_lines <= 6:
742
- verse_lines = 2
743
- chorus_lines = 2
744
- bridge_lines = 0
745
- elif total_lines <= 10:
746
- verse_lines = 3
747
- chorus_lines = 2
748
- bridge_lines = 0
749
- else:
750
- verse_lines = 3
751
- chorus_lines = 2
752
- bridge_lines = 2
753
  else:
754
- # Fallback to simple calculation
755
- total_lines = max(4, int(duration / 10))
756
 
757
- # Default section distribution
758
  if total_lines <= 6:
759
  verse_lines = 2
760
  chorus_lines = 2
@@ -767,15 +832,35 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
767
  verse_lines = 3
768
  chorus_lines = 2
769
  bridge_lines = 2
770
- except Exception as e:
771
- print(f"Error calculating lyrics length: {str(e)}")
772
  total_lines = max(4, int(duration / 10))
773
 
774
  # Default section distribution
775
- verse_lines = 3
776
- chorus_lines = 2
777
- bridge_lines = 0
 
 
 
 
 
 
 
 
 
 
 
 
778
 
 
 
 
 
 
 
 
 
779
  prompt = f"""
780
  You are a talented songwriter who specializes in {genre} music.
781
  Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
@@ -786,14 +871,14 @@ Music analysis has detected the following qualities in the music:
786
  - Primary emotion: {primary_emotion}
787
  - Primary theme: {primary_theme}
788
 
789
- IMPORTANT: The lyrics must match the rhythm of the music exactly!
790
  {syllable_guidance}
791
 
792
- When writing the lyrics:
793
- 1. Count syllables carefully for each line to match the specified pattern
794
- 2. Ensure words fall naturally on the beat
795
- 3. Place stressed syllables on strong beats
796
- 4. Create a coherent theme throughout the lyrics
 
797
 
798
  The lyrics should:
799
  - Perfectly capture the essence and style of {genre} music
@@ -820,14 +905,19 @@ Music analysis has detected the following qualities:
820
  - Primary emotion: {primary_emotion}
821
  - Primary theme: {primary_theme}
822
 
823
- IMPORTANT: The lyrics must match the rhythm of the music exactly!
824
  {syllable_guidance}
825
 
826
- When writing the lyrics:
827
- 1. Count syllables carefully for each line to match the specified pattern
828
- 2. Ensure words fall naturally on the beat
829
- 3. Place stressed syllables on strong beats
830
- 4. Create coherent lyrics that would work for this music segment
 
 
 
 
 
 
831
 
832
  The lyrics should:
833
  - Perfectly capture the essence and style of {genre} music
@@ -837,7 +927,7 @@ The lyrics should:
837
  - Match the audio segment duration of {duration:.1f} seconds
838
 
839
  DON'T include any section labels like [Verse] or [Chorus] unless specifically instructed.
840
- Instead, write lyrics that flow naturally and match the music's rhythm.
841
 
842
  Your lyrics:
843
  """
@@ -898,7 +988,7 @@ def process_audio(audio_file):
898
  is_music, ast_results = detect_music(audio_data)
899
  except Exception as e:
900
  print(f"Error in music detection: {str(e)}")
901
- return f"Error in music detection: {str(e)}", None, []
902
 
903
  if not is_music:
904
  return "The uploaded audio does not appear to be music. Please upload a music file.", None, ast_results
 
403
 
404
  # New function: Create flexible syllable templates
405
  def create_flexible_syllable_templates(beats_info):
406
+ """Create detailed syllable templates based on beat patterns, capturing stress patterns."""
407
  # Get the beat times and strengths
408
  beat_times = beats_info["beat_times"]
409
  beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
410
  phrases = beats_info.get("phrases", [])
411
+ tempo = beats_info.get("tempo", 120)
412
 
413
  # If no phrases were detected, create a simple division
414
  if not phrases:
 
419
  if end_idx - i >= 2: # Ensure at least 2 beats per phrase
420
  phrases.append(list(range(i, end_idx)))
421
 
422
+ # Create enhanced syllable templates for each phrase
423
  syllable_templates = []
424
 
425
  for phrase in phrases:
426
+ # Extract beat strengths for this phrase
 
427
  phrase_strengths = [beat_strengths[i] for i in phrase if i < len(beat_strengths)]
428
+ if not phrase_strengths:
429
+ phrase_strengths = [1.0] * len(phrase)
430
 
431
+ # Normalize strengths for easier pattern recognition
432
+ if phrase_strengths:
433
+ max_strength = max(phrase_strengths)
434
+ if max_strength > 0:
435
+ norm_strengths = [s/max_strength for s in phrase_strengths]
436
+ else:
437
+ norm_strengths = [1.0] * len(phrase_strengths)
438
+ else:
439
+ norm_strengths = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
 
441
+ # Identify strong and weak beats (S = strong, w = weak)
442
+ stress_pattern = []
443
+ for strength in norm_strengths:
444
+ if strength > 0.7:
445
+ stress_pattern.append("S") # Strong beat
446
+ elif strength > 0.4:
447
+ stress_pattern.append("m") # Medium beat
448
+ else:
449
+ stress_pattern.append("w") # Weak beat
450
 
451
+ # Calculate appropriate syllable count based on tempo and beat pattern
452
+ if tempo > 160:
453
+ # Very fast tempo - typically one syllable per beat
454
+ syllables_per_beat = [1] * len(phrase)
455
+ elif tempo > 120:
456
+ # Fast tempo
457
+ syllables_per_beat = [1 if s == "S" or s == "m" else 1 for s in stress_pattern]
458
+ elif tempo > 90:
459
+ # Medium tempo
460
+ syllables_per_beat = [2 if s == "S" else 1 if s == "m" else 1 for s in stress_pattern]
 
461
  else:
462
+ # Slow tempo
463
+ syllables_per_beat = [2 if s == "S" else 2 if s == "m" else 1 for s in stress_pattern]
464
 
465
+ # Create a detailed template with stress information
466
+ detailed_template = []
467
+ for i, (stress, syllable_count) in enumerate(zip(stress_pattern, syllables_per_beat)):
468
+ if stress == "S":
469
+ # Mark strong beat with capital letter followed by syllable count
470
+ detailed_template.append(f"S{syllable_count}")
471
+ elif stress == "m":
472
+ # Mark medium beat with lowercase letter
473
+ detailed_template.append(f"m{syllable_count}")
474
+ else:
475
+ # Mark weak beat with lowercase letter
476
+ detailed_template.append(f"w{syllable_count}")
477
+
478
+ # Join all beat templates for this phrase
479
+ phrase_template = "-".join(detailed_template)
480
+ syllable_templates.append(phrase_template)
481
 
482
+ # Join all phrase templates
483
+ return "|".join(syllable_templates)
 
 
484
 
485
+ # Helper function to convert technical templates to human-readable instructions
486
+ def format_syllable_templates_for_prompt(syllable_templates):
487
+ """Convert technical syllable templates into clear, human-readable instructions."""
488
+ if not syllable_templates:
489
+ return ""
490
 
491
+ # Check if we're dealing with the enhanced format or the old format
492
+ if isinstance(syllable_templates, str) and "|" in syllable_templates:
493
+ # Enhanced format with stress patterns
494
+ phrases = syllable_templates.split("|")
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
+ instructions = []
497
+ for i, phrase in enumerate(phrases):
498
+ beats = phrase.split("-")
499
+ beat_instructions = []
 
 
 
 
500
 
501
+ for beat in beats:
502
+ if beat.startswith("S"):
503
+ # Strong beat
504
+ count = beat[1:]
505
+ beat_instructions.append(f"STRONG({count})")
506
+ elif beat.startswith("m"):
507
+ # Medium beat
508
+ count = beat[1:]
509
+ beat_instructions.append(f"medium({count})")
510
+ elif beat.startswith("w"):
511
+ # Weak beat
512
+ count = beat[1:]
513
+ beat_instructions.append(f"weak({count})")
514
+ else:
515
+ # Fallback for old format
516
+ beat_instructions.append(beat)
517
 
518
+ line_desc = " β†’ ".join(beat_instructions)
519
+ instructions.append(f"Line {i+1}: {line_desc}")
520
 
521
+ return "\n".join(instructions)
522
+ else:
523
+ # Handle the original format or segment dictionaries
524
+ formatted_lines = []
525
 
526
+ if isinstance(syllable_templates, list):
527
+ for i, template in enumerate(syllable_templates):
528
+ if isinstance(template, dict) and "syllable_template" in template:
529
+ formatted_lines.append(f"Line {i+1}: {template['syllable_template']} syllables")
530
+ elif isinstance(template, str):
531
+ formatted_lines.append(f"Line {i+1}: {template} syllables")
532
+
533
+ return "\n".join(formatted_lines)
534
+
535
+ return str(syllable_templates)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
+ # Enhanced verification function to check syllable counts and stress patterns
538
  def verify_flexible_syllable_counts(lyrics, templates):
539
+ """Verify that the generated lyrics match the required syllable counts and stress patterns."""
540
  # Split lyrics into lines
541
  lines = [line.strip() for line in lyrics.split("\n") if line.strip()]
542
 
 
551
 
552
  # Handle different template formats
553
  if isinstance(template, dict) and "syllable_template" in template:
554
+ template_str = template["syllable_template"]
555
  elif isinstance(template, str):
556
+ template_str = template
557
  else:
558
  continue
559
 
560
+ # Parse the enhanced template format if present
561
+ if "|" in template_str:
562
+ # This is a phrase, take just the first part for now
563
+ template_str = template_str.split("|")[0]
564
+
565
+ # Count expected syllables
566
+ total_expected = 0
567
+
568
+ # Handle the enhanced format with stress patterns
569
+ if "-" in template_str and any(x in template_str for x in ["S", "m", "w"]):
570
+ beats = template_str.split("-")
571
+ expected_counts = []
572
+
573
+ for beat in beats:
574
+ if beat.startswith(("S", "m", "w")):
575
+ try:
576
+ count = int(beat[1:])
577
+ expected_counts.append(count)
578
+ total_expected += count
579
+ except ValueError:
580
+ expected_counts.append(1)
581
+ total_expected += 1
582
+ else:
583
+ try:
584
+ count = int(beat)
585
+ expected_counts.append(count)
586
+ total_expected += count
587
+ except ValueError:
588
+ expected_counts.append(1)
589
+ total_expected += 1
590
+ else:
591
+ # Old format - simple numbers separated by hyphens
592
+ try:
593
+ expected_counts = [int(count) for count in template_str.split("-")]
594
+ total_expected = sum(expected_counts)
595
+ except ValueError:
596
+ # Fallback if we can't parse the template
597
+ expected_counts = []
598
+ total_expected = 0
599
+
600
  # Count actual syllables
601
  actual_count = count_syllables(line)
602
 
603
  # Calculate difference
604
+ if total_expected > 0 and abs(actual_count - total_expected) > 2: # Allow small differences
 
605
  verification_notes.append(f"Line {i+1}: Expected {total_expected} syllables, got {actual_count}")
606
+
607
+ # Additionally check if stressed syllables align with strong beats
608
+ words = re.findall(r'\b[a-zA-Z]+\b', line.lower())
609
+ if words and expected_counts and "S" in template_str:
610
+ # Try to find strong beats in the template
611
+ strong_beat_positions = []
612
+ current_pos = 0
613
+
614
+ for j, beat in enumerate(template_str.split("-")):
615
+ if beat.startswith("S"):
616
+ beat_count = int(beat[1:]) if len(beat) > 1 else 1
617
+ strong_beat_positions.append(current_pos)
618
+ current_pos += beat_count
619
+ else:
620
+ beat_count = int(beat[1:]) if len(beat) > 1 else 1
621
+ current_pos += beat_count
622
+
623
+ # Try to get pronunciations for words to check stress alignment
624
+ word_stresses = []
625
+ for word in words:
626
+ pronunciations = pronouncing.phones_for_word(word)
627
+ if pronunciations:
628
+ stress_pattern = pronouncing.stresses(pronunciations[0])
629
+ word_stresses.append(stress_pattern)
630
+
631
+ # Add note about stress alignment if we have enough information
632
+ if word_stresses and strong_beat_positions and len(word_stresses) >= len(strong_beat_positions):
633
+ verification_notes.append(f" β†’ Check stress alignment on words with strong beats")
634
 
635
  # If we found issues, add them as notes at the end of the lyrics
636
  if verification_notes:
637
  lyrics += "\n\n[Note: Potential rhythm mismatches in these lines:]\n"
638
  lyrics += "\n".join(verification_notes)
639
+ lyrics += "\n\n[To fix mismatches:]\n"
640
+ lyrics += "1. Make sure stressed syllables fall on STRONG beats\n"
641
+ lyrics += "2. Adjust syllable counts to match the template\n"
642
+ lyrics += "3. Try using words with naturally aligned stress patterns"
643
 
644
  return lyrics
645
 
646
  # Modified generate_lyrics function
647
  def generate_lyrics(genre, duration, emotion_results, song_structure=None):
648
+ """Generate lyrics based on the genre, emotion, and structure analysis with enhanced rhythmic alignment."""
649
  # Extract emotion and theme data from analysis results
650
  primary_emotion = emotion_results["emotion_analysis"]["primary_emotion"]
651
  primary_theme = emotion_results["theme_analysis"]["primary_theme"]
 
668
  if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
669
  flexible = song_structure["flexible_structure"]
670
  if "segments" in flexible and flexible["segments"]:
671
+ # Get the segments
672
+ segments = flexible["segments"]
673
 
674
+ # Process each segment to create enhanced rhythmic templates
675
+ enhanced_templates = []
676
+
677
+ for i, segment in enumerate(segments):
678
  if i < 15: # Limit to 15 lines to keep prompt manageable
679
+ # Get the beat information for this segment
680
+ segment_start = segment["start"]
681
+ segment_end = segment["end"]
682
+
683
+ # Find beats within this segment
684
+ segment_beats = []
685
+ beat_times = flexible["beats"]["beat_times"]
686
+ beat_strengths = flexible["beats"].get("beat_strengths", [])
687
+
688
+ for j, beat_time in enumerate(beat_times):
689
+ if segment_start <= beat_time < segment_end:
690
+ # Add this beat to the segment
691
+ segment_beats.append(j)
692
+
693
+ # Create segment-specific beat info
694
+ segment_beats_info = {
695
+ "beat_times": [beat_times[j] for j in segment_beats],
696
+ "tempo": flexible["beats"].get("tempo", 120)
697
+ }
698
+
699
+ if beat_strengths:
700
+ segment_beats_info["beat_strengths"] = [
701
+ beat_strengths[j] for j in segment_beats
702
+ if j < len(beat_strengths)
703
+ ]
704
+
705
+ # Create a phrase structure for this segment
706
+ segment_beats_info["phrases"] = [segment_beats]
707
+
708
+ # Generate enhanced template
709
+ enhanced_template = create_flexible_syllable_templates(segment_beats_info)
710
+ enhanced_templates.append(enhanced_template)
711
+ templates_for_verification.append(enhanced_template)
712
+
713
+ # Format templates for the prompt
714
+ syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
715
+ syllable_guidance += "Match each line exactly to this rhythm pattern (STRONG beats need stressed syllables):\n\n"
716
+ syllable_guidance += format_syllable_templates_for_prompt(enhanced_templates)
717
+
718
+ # Add explanation of notation
719
+ syllable_guidance += "\n\nWhere:\n"
720
+ syllable_guidance += "- STRONG(n): Place a STRESSED syllable here, followed by (n-1) unstressed syllables\n"
721
+ syllable_guidance += "- medium(n): Place a medium-stressed or unstressed syllable here, followed by (n-1) unstressed syllables\n"
722
+ syllable_guidance += "- weak(n): Place unstressed syllables here\n"
723
+ syllable_guidance += "- β†’: Indicates flow from one beat to the next within a line\n"
724
 
725
  # Fallback to traditional sections if needed
726
  elif "syllables" in song_structure and song_structure["syllables"]:
727
+ syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
728
+ syllable_guidance += "Follow these syllable patterns for each section:\n\n"
729
 
730
  for section in song_structure["syllables"]:
731
  if "syllable_template" in section:
732
+ # Process to create enhanced template
733
+ section_beats_info = {
734
+ "beat_times": [beat for beat in song_structure["beats"]["beat_times"]
735
+ if section["start"] <= beat < section["end"]],
736
+ "tempo": song_structure["beats"].get("tempo", 120)
737
+ }
738
+
739
+ if "beat_strengths" in song_structure["beats"]:
740
+ section_beats_info["beat_strengths"] = [
741
+ strength for i, strength in enumerate(song_structure["beats"]["beat_strengths"])
742
+ if i < len(song_structure["beats"]["beat_times"]) and
743
+ section["start"] <= song_structure["beats"]["beat_times"][i] < section["end"]
744
+ ]
745
+
746
+ # Create a phrase structure for this section
747
+ section_beats_info["phrases"] = [list(range(len(section_beats_info["beat_times"])))]
748
+
749
+ # Generate enhanced template
750
+ enhanced_template = create_flexible_syllable_templates(section_beats_info)
751
+
752
+ syllable_guidance += f"[{section['type'].capitalize()}]:\n"
753
+ syllable_guidance += format_syllable_templates_for_prompt(enhanced_template) + "\n\n"
754
+ templates_for_verification.append(section)
755
  elif "syllable_count" in section:
756
  syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
 
 
 
757
 
758
  # If we couldn't get specific templates, use general guidance
759
  if not syllable_guidance:
760
+ syllable_guidance = "RHYTHM ALIGNMENT INSTRUCTIONS:\n\n"
761
+ syllable_guidance += "1. Align stressed syllables with strong beats (usually beats 1 and 3 in 4/4 time)\n"
762
+ syllable_guidance += "2. Use unstressed syllables on weak beats (usually beats 2 and 4 in 4/4 time)\n"
763
+ syllable_guidance += "3. Use appropriate syllable counts based on tempo:\n"
764
+ syllable_guidance += " - Fast tempo (>120 BPM): 4-6 syllables per line\n"
765
+ syllable_guidance += " - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
766
+ syllable_guidance += " - Slow tempo (<90 BPM): 8-10 syllables per line\n"
767
+
768
+ # Add examples of syllable-beat alignment with stress patterns
769
+ syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
770
+ syllable_guidance += "Pattern: STRONG(1) β†’ weak(1) β†’ medium(1) β†’ weak(1)\n"
771
+ syllable_guidance += "Lyric: 'HEAR the MU-sic PLAY'\n"
772
+ syllable_guidance += " ↑ ↑ ↑ ↑\n"
773
+ syllable_guidance += " S w m w <- BEAT TYPE\n\n"
774
 
775
+ syllable_guidance += "Pattern: STRONG(2) β†’ weak(1) β†’ STRONG(1) β†’ weak(2)\n"
776
+ syllable_guidance += "Lyric: 'DANC-ing TO the RHYTHM of LOVE'\n"
777
+ syllable_guidance += " ↑ ↑ ↑ ↑ ↑ ↑\n"
778
+ syllable_guidance += " S S w S w w <- BEAT TYPE\n\n"
779
+
780
+ syllable_guidance += "Pattern: STRONG(1) β†’ medium(2) β†’ STRONG(1) β†’ weak(1)\n"
781
+ syllable_guidance += "Lyric: 'TIME keeps FLOW-ing ON and ON'\n"
782
+ syllable_guidance += " ↑ ↑ ↑ ↑ ↑ ↑\n"
783
+ syllable_guidance += " S m m S w w <- BEAT TYPE\n\n"
784
 
785
  # Determine if we should use traditional sections or not
786
  use_sections = True
 
791
  if len(segments) > 4:
792
  use_sections = False
793
 
794
+ # Calculate appropriate lyrics length and section distribution
795
+ try:
796
+ if song_structure and "beats" in song_structure:
797
+ beats_info = song_structure["beats"]
798
+ tempo = beats_info.get("tempo", 120)
799
+ time_signature = beats_info.get("time_signature", 4)
800
+ lines_structure = calculate_lyrics_length(duration, tempo, time_signature)
801
+
802
+ # Handle both possible return types
803
+ if isinstance(lines_structure, dict):
804
+ total_lines = lines_structure["lines_count"]
805
 
806
+ # Extract section line counts if available
807
+ verse_lines = 0
808
+ chorus_lines = 0
809
+ bridge_lines = 0
810
+
811
+ for section in lines_structure["sections"]:
812
+ if section["type"] == "verse":
813
+ verse_lines = section["lines"]
814
+ elif section["type"] == "chorus":
815
+ chorus_lines = section["lines"]
816
+ elif section["type"] == "bridge":
817
+ bridge_lines = section["lines"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
  else:
819
+ # The function returned just an integer (old behavior)
820
+ total_lines = lines_structure
821
 
822
+ # Default section distribution based on total lines
823
  if total_lines <= 6:
824
  verse_lines = 2
825
  chorus_lines = 2
 
832
  verse_lines = 3
833
  chorus_lines = 2
834
  bridge_lines = 2
835
+ else:
836
+ # Fallback to simple calculation
837
  total_lines = max(4, int(duration / 10))
838
 
839
  # Default section distribution
840
+ if total_lines <= 6:
841
+ verse_lines = 2
842
+ chorus_lines = 2
843
+ bridge_lines = 0
844
+ elif total_lines <= 10:
845
+ verse_lines = 3
846
+ chorus_lines = 2
847
+ bridge_lines = 0
848
+ else:
849
+ verse_lines = 3
850
+ chorus_lines = 2
851
+ bridge_lines = 2
852
+ except Exception as e:
853
+ print(f"Error calculating lyrics length: {str(e)}")
854
+ total_lines = max(4, int(duration / 10))
855
 
856
+ # Default section distribution
857
+ verse_lines = 3
858
+ chorus_lines = 2
859
+ bridge_lines = 0
860
+
861
+ # Create enhanced prompt with better rhythm alignment instructions
862
+ if use_sections:
863
+ # Traditional approach with sections
864
  prompt = f"""
865
  You are a talented songwriter who specializes in {genre} music.
866
  Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
 
871
  - Primary emotion: {primary_emotion}
872
  - Primary theme: {primary_theme}
873
 
 
874
  {syllable_guidance}
875
 
876
+ CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
877
+ 1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
878
+ 2. Natural word stress patterns must match the beat strength (strong words on strong beats)
879
+ 3. Line breaks should occur at phrase endings for natural breathing
880
+ 4. Consonant clusters should be avoided on fast notes and strong beats
881
+ 5. Open vowels (a, e, o) work better for sustained notes and syllables
882
 
883
  The lyrics should:
884
  - Perfectly capture the essence and style of {genre} music
 
905
  - Primary emotion: {primary_emotion}
906
  - Primary theme: {primary_theme}
907
 
 
908
  {syllable_guidance}
909
 
910
+ CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
911
+ 1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
912
+ 2. Natural word stress patterns must match the beat strength (strong words on strong beats)
913
+ 3. Line breaks should occur at phrase endings for natural breathing
914
+ 4. Consonant clusters should be avoided on fast notes and strong beats
915
+ 5. Open vowels (a, e, o) work better for sustained notes and syllables
916
+
917
+ For perfect alignment examples:
918
+ - "FEEL the RHY-thm in your SOUL" – stressed syllables on strong beats
919
+ - "to-DAY we DANCE a-LONG" – natural speech stress matches musical stress
920
+ - "WAIT-ing FOR the SUN to RISE" – syllable emphasis aligns with beat emphasis
921
 
922
  The lyrics should:
923
  - Perfectly capture the essence and style of {genre} music
 
927
  - Match the audio segment duration of {duration:.1f} seconds
928
 
929
  DON'T include any section labels like [Verse] or [Chorus] unless specifically instructed.
930
+ Instead, write lyrics that flow naturally and match the music's rhythm precisely.
931
 
932
  Your lyrics:
933
  """
 
988
  is_music, ast_results = detect_music(audio_data)
989
  except Exception as e:
990
  print(f"Error in music detection: {str(e)}")
991
+ return f"Error in music detection: {str(e)}", None, ast_results
992
 
993
  if not is_music:
994
  return "The uploaded audio does not appear to be music. Please upload a music file.", None, ast_results