root
commited on
Commit
Β·
054fb90
1
Parent(s):
4ddd8f4
syllables trying second
Browse files
app.py
CHANGED
@@ -403,11 +403,12 @@ def detect_sections(y, sr):
|
|
403 |
|
404 |
# New function: Create flexible syllable templates
|
405 |
def create_flexible_syllable_templates(beats_info):
|
406 |
-
"""Create syllable templates based
|
407 |
# Get the beat times and strengths
|
408 |
beat_times = beats_info["beat_times"]
|
409 |
beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
|
410 |
phrases = beats_info.get("phrases", [])
|
|
|
411 |
|
412 |
# If no phrases were detected, create a simple division
|
413 |
if not phrases:
|
@@ -418,190 +419,124 @@ def create_flexible_syllable_templates(beats_info):
|
|
418 |
if end_idx - i >= 2: # Ensure at least 2 beats per phrase
|
419 |
phrases.append(list(range(i, end_idx)))
|
420 |
|
421 |
-
# Create syllable templates for each phrase
|
422 |
syllable_templates = []
|
423 |
|
424 |
for phrase in phrases:
|
425 |
-
#
|
426 |
-
beat_count = len(phrase)
|
427 |
phrase_strengths = [beat_strengths[i] for i in phrase if i < len(beat_strengths)]
|
428 |
-
|
|
|
429 |
|
430 |
-
#
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
# Adjust for beat strength
|
440 |
-
syllables_per_beat *= (0.8 + (avg_strength * 0.4))
|
441 |
-
|
442 |
-
# Calculate total syllables for the phrase
|
443 |
-
phrase_syllables = int(beat_count * syllables_per_beat)
|
444 |
-
if phrase_syllables < 2:
|
445 |
-
phrase_syllables = 2
|
446 |
-
|
447 |
-
syllable_templates.append(str(phrase_syllables))
|
448 |
-
|
449 |
-
return "-".join(syllable_templates)
|
450 |
-
|
451 |
-
# New function: Analyze flexible structure
|
452 |
-
def analyze_flexible_structure(audio_data):
|
453 |
-
"""Analyze music structure without assuming traditional song sections."""
|
454 |
-
y = audio_data["waveform"]
|
455 |
-
sr = audio_data["sample_rate"]
|
456 |
-
|
457 |
-
# Enhanced beat detection
|
458 |
-
beats_info = detect_beats(y, sr)
|
459 |
-
|
460 |
-
# Identify segments with similar audio features (using MFCC)
|
461 |
-
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
|
462 |
-
|
463 |
-
# Use agglomerative clustering to find segment boundaries
|
464 |
-
segment_boundaries = librosa.segment.agglomerative(mfcc, 3)
|
465 |
-
segment_times = librosa.frames_to_time(segment_boundaries, sr=sr)
|
466 |
-
|
467 |
-
# Create segments
|
468 |
-
segments = []
|
469 |
-
for i in range(len(segment_times)-1):
|
470 |
-
start = segment_times[i]
|
471 |
-
end = segment_times[i+1]
|
472 |
-
|
473 |
-
# Find beats within this segment
|
474 |
-
segment_beats = []
|
475 |
-
for j, time in enumerate(beats_info["beat_times"]):
|
476 |
-
if start <= time < end:
|
477 |
-
segment_beats.append(j)
|
478 |
-
|
479 |
-
# Create syllable template for this segment
|
480 |
-
if segment_beats:
|
481 |
-
segment_beats_info = {
|
482 |
-
"beat_times": [beats_info["beat_times"][j] for j in segment_beats],
|
483 |
-
"tempo": beats_info.get("tempo", 120)
|
484 |
-
}
|
485 |
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
|
|
494 |
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
syllable_template = create_flexible_syllable_templates(segment_beats_info)
|
506 |
else:
|
507 |
-
|
|
|
508 |
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
"
|
513 |
-
|
514 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
515 |
|
516 |
-
|
517 |
-
|
518 |
-
"segments": segments
|
519 |
-
}
|
520 |
|
521 |
-
#
|
522 |
-
def
|
523 |
-
"""
|
524 |
-
|
|
|
525 |
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
if section["start"] <= beat < section["end"]
|
531 |
-
]
|
532 |
-
|
533 |
-
# Calculate syllables based on section type and beat count
|
534 |
-
beat_count = len(section_beats)
|
535 |
-
|
536 |
-
# Extract beat strengths for this section if available
|
537 |
-
section_beat_strengths = []
|
538 |
-
if "beat_strengths" in beats_info:
|
539 |
-
for i, beat_time in enumerate(beats_info["beat_times"]):
|
540 |
-
if section["start"] <= beat_time < section["end"] and i < len(beats_info["beat_strengths"]):
|
541 |
-
section_beat_strengths.append(beats_info["beat_strengths"][i])
|
542 |
|
543 |
-
|
544 |
-
|
545 |
-
"
|
546 |
-
|
547 |
-
}
|
548 |
-
|
549 |
-
if section_beat_strengths:
|
550 |
-
segment_beats_info["beat_strengths"] = section_beat_strengths
|
551 |
|
552 |
-
|
553 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
|
555 |
-
|
556 |
-
|
557 |
|
558 |
-
|
559 |
-
|
560 |
-
|
|
|
561 |
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
"
|
570 |
-
|
571 |
-
|
572 |
-
return syllables_per_section
|
573 |
-
|
574 |
-
def calculate_detailed_song_structure(audio_data):
|
575 |
-
"""Calculate detailed song structure for better lyrics generation."""
|
576 |
-
y = audio_data["waveform"]
|
577 |
-
sr = audio_data["sample_rate"]
|
578 |
-
|
579 |
-
# Enhanced beat detection
|
580 |
-
beats_info = detect_beats(y, sr)
|
581 |
-
|
582 |
-
# Detect sections
|
583 |
-
sections = detect_sections(y, sr)
|
584 |
-
|
585 |
-
# Create enhanced syllable info per section
|
586 |
-
syllables_info = estimate_syllables_per_section(beats_info, sections)
|
587 |
-
|
588 |
-
# Get flexible structure analysis as an alternative approach
|
589 |
-
try:
|
590 |
-
flexible_structure = analyze_flexible_structure(audio_data)
|
591 |
-
except Exception as e:
|
592 |
-
print(f"Warning: Flexible structure analysis failed: {str(e)}")
|
593 |
-
flexible_structure = None
|
594 |
-
|
595 |
-
return {
|
596 |
-
"beats": beats_info,
|
597 |
-
"sections": sections,
|
598 |
-
"syllables": syllables_info,
|
599 |
-
"flexible_structure": flexible_structure
|
600 |
-
}
|
601 |
|
602 |
-
#
|
603 |
def verify_flexible_syllable_counts(lyrics, templates):
|
604 |
-
"""Verify that the generated lyrics match the required syllable counts."""
|
605 |
# Split lyrics into lines
|
606 |
lines = [line.strip() for line in lyrics.split("\n") if line.strip()]
|
607 |
|
@@ -616,31 +551,101 @@ def verify_flexible_syllable_counts(lyrics, templates):
|
|
616 |
|
617 |
# Handle different template formats
|
618 |
if isinstance(template, dict) and "syllable_template" in template:
|
619 |
-
|
620 |
elif isinstance(template, str):
|
621 |
-
|
622 |
else:
|
623 |
continue
|
624 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
625 |
# Count actual syllables
|
626 |
actual_count = count_syllables(line)
|
627 |
|
628 |
# Calculate difference
|
629 |
-
total_expected
|
630 |
-
if abs(actual_count - total_expected) > 2: # Allow small differences
|
631 |
verification_notes.append(f"Line {i+1}: Expected {total_expected} syllables, got {actual_count}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
632 |
|
633 |
# If we found issues, add them as notes at the end of the lyrics
|
634 |
if verification_notes:
|
635 |
lyrics += "\n\n[Note: Potential rhythm mismatches in these lines:]\n"
|
636 |
lyrics += "\n".join(verification_notes)
|
637 |
-
lyrics += "\n[
|
|
|
|
|
|
|
638 |
|
639 |
return lyrics
|
640 |
|
641 |
# Modified generate_lyrics function
|
642 |
def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
643 |
-
"""Generate lyrics based on the genre, emotion, and structure analysis."""
|
644 |
# Extract emotion and theme data from analysis results
|
645 |
primary_emotion = emotion_results["emotion_analysis"]["primary_emotion"]
|
646 |
primary_theme = emotion_results["theme_analysis"]["primary_theme"]
|
@@ -663,39 +668,119 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
663 |
if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
|
664 |
flexible = song_structure["flexible_structure"]
|
665 |
if "segments" in flexible and flexible["segments"]:
|
666 |
-
|
|
|
667 |
|
668 |
-
|
|
|
|
|
|
|
669 |
if i < 15: # Limit to 15 lines to keep prompt manageable
|
670 |
-
|
671 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
672 |
|
673 |
# Fallback to traditional sections if needed
|
674 |
elif "syllables" in song_structure and song_structure["syllables"]:
|
675 |
-
syllable_guidance = "
|
|
|
676 |
|
677 |
for section in song_structure["syllables"]:
|
678 |
if "syllable_template" in section:
|
679 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
680 |
elif "syllable_count" in section:
|
681 |
syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
|
682 |
-
|
683 |
-
if "syllable_template" in section:
|
684 |
-
templates_for_verification.append(section)
|
685 |
|
686 |
# If we couldn't get specific templates, use general guidance
|
687 |
if not syllable_guidance:
|
688 |
-
syllable_guidance = "
|
689 |
-
syllable_guidance += "
|
690 |
-
syllable_guidance += "
|
691 |
-
syllable_guidance += "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
692 |
|
693 |
-
|
694 |
-
syllable_guidance += "
|
695 |
-
syllable_guidance += "
|
696 |
-
syllable_guidance += "
|
697 |
-
|
698 |
-
syllable_guidance += "
|
|
|
|
|
|
|
699 |
|
700 |
# Determine if we should use traditional sections or not
|
701 |
use_sections = True
|
@@ -706,55 +791,35 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
706 |
if len(segments) > 4:
|
707 |
use_sections = False
|
708 |
|
709 |
-
#
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
-
|
716 |
-
|
717 |
-
|
718 |
-
|
|
|
719 |
|
720 |
-
#
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
elif section["type"] == "chorus":
|
733 |
-
chorus_lines = section["lines"]
|
734 |
-
elif section["type"] == "bridge":
|
735 |
-
bridge_lines = section["lines"]
|
736 |
-
else:
|
737 |
-
# The function returned just an integer (old behavior)
|
738 |
-
total_lines = lines_structure
|
739 |
-
|
740 |
-
# Default section distribution based on total lines
|
741 |
-
if total_lines <= 6:
|
742 |
-
verse_lines = 2
|
743 |
-
chorus_lines = 2
|
744 |
-
bridge_lines = 0
|
745 |
-
elif total_lines <= 10:
|
746 |
-
verse_lines = 3
|
747 |
-
chorus_lines = 2
|
748 |
-
bridge_lines = 0
|
749 |
-
else:
|
750 |
-
verse_lines = 3
|
751 |
-
chorus_lines = 2
|
752 |
-
bridge_lines = 2
|
753 |
else:
|
754 |
-
#
|
755 |
-
total_lines =
|
756 |
|
757 |
-
# Default section distribution
|
758 |
if total_lines <= 6:
|
759 |
verse_lines = 2
|
760 |
chorus_lines = 2
|
@@ -767,15 +832,35 @@ def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
|
767 |
verse_lines = 3
|
768 |
chorus_lines = 2
|
769 |
bridge_lines = 2
|
770 |
-
|
771 |
-
|
772 |
total_lines = max(4, int(duration / 10))
|
773 |
|
774 |
# Default section distribution
|
775 |
-
|
776 |
-
|
777 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
778 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
779 |
prompt = f"""
|
780 |
You are a talented songwriter who specializes in {genre} music.
|
781 |
Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
|
@@ -786,14 +871,14 @@ Music analysis has detected the following qualities in the music:
|
|
786 |
- Primary emotion: {primary_emotion}
|
787 |
- Primary theme: {primary_theme}
|
788 |
|
789 |
-
IMPORTANT: The lyrics must match the rhythm of the music exactly!
|
790 |
{syllable_guidance}
|
791 |
|
792 |
-
|
793 |
-
1.
|
794 |
-
2.
|
795 |
-
3.
|
796 |
-
4.
|
|
|
797 |
|
798 |
The lyrics should:
|
799 |
- Perfectly capture the essence and style of {genre} music
|
@@ -820,14 +905,19 @@ Music analysis has detected the following qualities:
|
|
820 |
- Primary emotion: {primary_emotion}
|
821 |
- Primary theme: {primary_theme}
|
822 |
|
823 |
-
IMPORTANT: The lyrics must match the rhythm of the music exactly!
|
824 |
{syllable_guidance}
|
825 |
|
826 |
-
|
827 |
-
1.
|
828 |
-
2.
|
829 |
-
3.
|
830 |
-
4.
|
|
|
|
|
|
|
|
|
|
|
|
|
831 |
|
832 |
The lyrics should:
|
833 |
- Perfectly capture the essence and style of {genre} music
|
@@ -837,7 +927,7 @@ The lyrics should:
|
|
837 |
- Match the audio segment duration of {duration:.1f} seconds
|
838 |
|
839 |
DON'T include any section labels like [Verse] or [Chorus] unless specifically instructed.
|
840 |
-
Instead, write lyrics that flow naturally and match the music's rhythm.
|
841 |
|
842 |
Your lyrics:
|
843 |
"""
|
@@ -898,7 +988,7 @@ def process_audio(audio_file):
|
|
898 |
is_music, ast_results = detect_music(audio_data)
|
899 |
except Exception as e:
|
900 |
print(f"Error in music detection: {str(e)}")
|
901 |
-
return f"Error in music detection: {str(e)}", None,
|
902 |
|
903 |
if not is_music:
|
904 |
return "The uploaded audio does not appear to be music. Please upload a music file.", None, ast_results
|
|
|
403 |
|
404 |
# New function: Create flexible syllable templates
|
405 |
def create_flexible_syllable_templates(beats_info):
|
406 |
+
"""Create detailed syllable templates based on beat patterns, capturing stress patterns."""
|
407 |
# Get the beat times and strengths
|
408 |
beat_times = beats_info["beat_times"]
|
409 |
beat_strengths = beats_info.get("beat_strengths", [1.0] * len(beat_times))
|
410 |
phrases = beats_info.get("phrases", [])
|
411 |
+
tempo = beats_info.get("tempo", 120)
|
412 |
|
413 |
# If no phrases were detected, create a simple division
|
414 |
if not phrases:
|
|
|
419 |
if end_idx - i >= 2: # Ensure at least 2 beats per phrase
|
420 |
phrases.append(list(range(i, end_idx)))
|
421 |
|
422 |
+
# Create enhanced syllable templates for each phrase
|
423 |
syllable_templates = []
|
424 |
|
425 |
for phrase in phrases:
|
426 |
+
# Extract beat strengths for this phrase
|
|
|
427 |
phrase_strengths = [beat_strengths[i] for i in phrase if i < len(beat_strengths)]
|
428 |
+
if not phrase_strengths:
|
429 |
+
phrase_strengths = [1.0] * len(phrase)
|
430 |
|
431 |
+
# Normalize strengths for easier pattern recognition
|
432 |
+
if phrase_strengths:
|
433 |
+
max_strength = max(phrase_strengths)
|
434 |
+
if max_strength > 0:
|
435 |
+
norm_strengths = [s/max_strength for s in phrase_strengths]
|
436 |
+
else:
|
437 |
+
norm_strengths = [1.0] * len(phrase_strengths)
|
438 |
+
else:
|
439 |
+
norm_strengths = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
440 |
|
441 |
+
# Identify strong and weak beats (S = strong, w = weak)
|
442 |
+
stress_pattern = []
|
443 |
+
for strength in norm_strengths:
|
444 |
+
if strength > 0.7:
|
445 |
+
stress_pattern.append("S") # Strong beat
|
446 |
+
elif strength > 0.4:
|
447 |
+
stress_pattern.append("m") # Medium beat
|
448 |
+
else:
|
449 |
+
stress_pattern.append("w") # Weak beat
|
450 |
|
451 |
+
# Calculate appropriate syllable count based on tempo and beat pattern
|
452 |
+
if tempo > 160:
|
453 |
+
# Very fast tempo - typically one syllable per beat
|
454 |
+
syllables_per_beat = [1] * len(phrase)
|
455 |
+
elif tempo > 120:
|
456 |
+
# Fast tempo
|
457 |
+
syllables_per_beat = [1 if s == "S" or s == "m" else 1 for s in stress_pattern]
|
458 |
+
elif tempo > 90:
|
459 |
+
# Medium tempo
|
460 |
+
syllables_per_beat = [2 if s == "S" else 1 if s == "m" else 1 for s in stress_pattern]
|
|
|
461 |
else:
|
462 |
+
# Slow tempo
|
463 |
+
syllables_per_beat = [2 if s == "S" else 2 if s == "m" else 1 for s in stress_pattern]
|
464 |
|
465 |
+
# Create a detailed template with stress information
|
466 |
+
detailed_template = []
|
467 |
+
for i, (stress, syllable_count) in enumerate(zip(stress_pattern, syllables_per_beat)):
|
468 |
+
if stress == "S":
|
469 |
+
# Mark strong beat with capital letter followed by syllable count
|
470 |
+
detailed_template.append(f"S{syllable_count}")
|
471 |
+
elif stress == "m":
|
472 |
+
# Mark medium beat with lowercase letter
|
473 |
+
detailed_template.append(f"m{syllable_count}")
|
474 |
+
else:
|
475 |
+
# Mark weak beat with lowercase letter
|
476 |
+
detailed_template.append(f"w{syllable_count}")
|
477 |
+
|
478 |
+
# Join all beat templates for this phrase
|
479 |
+
phrase_template = "-".join(detailed_template)
|
480 |
+
syllable_templates.append(phrase_template)
|
481 |
|
482 |
+
# Join all phrase templates
|
483 |
+
return "|".join(syllable_templates)
|
|
|
|
|
484 |
|
485 |
+
# Helper function to convert technical templates to human-readable instructions
|
486 |
+
def format_syllable_templates_for_prompt(syllable_templates):
|
487 |
+
"""Convert technical syllable templates into clear, human-readable instructions."""
|
488 |
+
if not syllable_templates:
|
489 |
+
return ""
|
490 |
|
491 |
+
# Check if we're dealing with the enhanced format or the old format
|
492 |
+
if isinstance(syllable_templates, str) and "|" in syllable_templates:
|
493 |
+
# Enhanced format with stress patterns
|
494 |
+
phrases = syllable_templates.split("|")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
|
496 |
+
instructions = []
|
497 |
+
for i, phrase in enumerate(phrases):
|
498 |
+
beats = phrase.split("-")
|
499 |
+
beat_instructions = []
|
|
|
|
|
|
|
|
|
500 |
|
501 |
+
for beat in beats:
|
502 |
+
if beat.startswith("S"):
|
503 |
+
# Strong beat
|
504 |
+
count = beat[1:]
|
505 |
+
beat_instructions.append(f"STRONG({count})")
|
506 |
+
elif beat.startswith("m"):
|
507 |
+
# Medium beat
|
508 |
+
count = beat[1:]
|
509 |
+
beat_instructions.append(f"medium({count})")
|
510 |
+
elif beat.startswith("w"):
|
511 |
+
# Weak beat
|
512 |
+
count = beat[1:]
|
513 |
+
beat_instructions.append(f"weak({count})")
|
514 |
+
else:
|
515 |
+
# Fallback for old format
|
516 |
+
beat_instructions.append(beat)
|
517 |
|
518 |
+
line_desc = " β ".join(beat_instructions)
|
519 |
+
instructions.append(f"Line {i+1}: {line_desc}")
|
520 |
|
521 |
+
return "\n".join(instructions)
|
522 |
+
else:
|
523 |
+
# Handle the original format or segment dictionaries
|
524 |
+
formatted_lines = []
|
525 |
|
526 |
+
if isinstance(syllable_templates, list):
|
527 |
+
for i, template in enumerate(syllable_templates):
|
528 |
+
if isinstance(template, dict) and "syllable_template" in template:
|
529 |
+
formatted_lines.append(f"Line {i+1}: {template['syllable_template']} syllables")
|
530 |
+
elif isinstance(template, str):
|
531 |
+
formatted_lines.append(f"Line {i+1}: {template} syllables")
|
532 |
+
|
533 |
+
return "\n".join(formatted_lines)
|
534 |
+
|
535 |
+
return str(syllable_templates)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
536 |
|
537 |
+
# Enhanced verification function to check syllable counts and stress patterns
|
538 |
def verify_flexible_syllable_counts(lyrics, templates):
|
539 |
+
"""Verify that the generated lyrics match the required syllable counts and stress patterns."""
|
540 |
# Split lyrics into lines
|
541 |
lines = [line.strip() for line in lyrics.split("\n") if line.strip()]
|
542 |
|
|
|
551 |
|
552 |
# Handle different template formats
|
553 |
if isinstance(template, dict) and "syllable_template" in template:
|
554 |
+
template_str = template["syllable_template"]
|
555 |
elif isinstance(template, str):
|
556 |
+
template_str = template
|
557 |
else:
|
558 |
continue
|
559 |
|
560 |
+
# Parse the enhanced template format if present
|
561 |
+
if "|" in template_str:
|
562 |
+
# This is a phrase, take just the first part for now
|
563 |
+
template_str = template_str.split("|")[0]
|
564 |
+
|
565 |
+
# Count expected syllables
|
566 |
+
total_expected = 0
|
567 |
+
|
568 |
+
# Handle the enhanced format with stress patterns
|
569 |
+
if "-" in template_str and any(x in template_str for x in ["S", "m", "w"]):
|
570 |
+
beats = template_str.split("-")
|
571 |
+
expected_counts = []
|
572 |
+
|
573 |
+
for beat in beats:
|
574 |
+
if beat.startswith(("S", "m", "w")):
|
575 |
+
try:
|
576 |
+
count = int(beat[1:])
|
577 |
+
expected_counts.append(count)
|
578 |
+
total_expected += count
|
579 |
+
except ValueError:
|
580 |
+
expected_counts.append(1)
|
581 |
+
total_expected += 1
|
582 |
+
else:
|
583 |
+
try:
|
584 |
+
count = int(beat)
|
585 |
+
expected_counts.append(count)
|
586 |
+
total_expected += count
|
587 |
+
except ValueError:
|
588 |
+
expected_counts.append(1)
|
589 |
+
total_expected += 1
|
590 |
+
else:
|
591 |
+
# Old format - simple numbers separated by hyphens
|
592 |
+
try:
|
593 |
+
expected_counts = [int(count) for count in template_str.split("-")]
|
594 |
+
total_expected = sum(expected_counts)
|
595 |
+
except ValueError:
|
596 |
+
# Fallback if we can't parse the template
|
597 |
+
expected_counts = []
|
598 |
+
total_expected = 0
|
599 |
+
|
600 |
# Count actual syllables
|
601 |
actual_count = count_syllables(line)
|
602 |
|
603 |
# Calculate difference
|
604 |
+
if total_expected > 0 and abs(actual_count - total_expected) > 2: # Allow small differences
|
|
|
605 |
verification_notes.append(f"Line {i+1}: Expected {total_expected} syllables, got {actual_count}")
|
606 |
+
|
607 |
+
# Additionally check if stressed syllables align with strong beats
|
608 |
+
words = re.findall(r'\b[a-zA-Z]+\b', line.lower())
|
609 |
+
if words and expected_counts and "S" in template_str:
|
610 |
+
# Try to find strong beats in the template
|
611 |
+
strong_beat_positions = []
|
612 |
+
current_pos = 0
|
613 |
+
|
614 |
+
for j, beat in enumerate(template_str.split("-")):
|
615 |
+
if beat.startswith("S"):
|
616 |
+
beat_count = int(beat[1:]) if len(beat) > 1 else 1
|
617 |
+
strong_beat_positions.append(current_pos)
|
618 |
+
current_pos += beat_count
|
619 |
+
else:
|
620 |
+
beat_count = int(beat[1:]) if len(beat) > 1 else 1
|
621 |
+
current_pos += beat_count
|
622 |
+
|
623 |
+
# Try to get pronunciations for words to check stress alignment
|
624 |
+
word_stresses = []
|
625 |
+
for word in words:
|
626 |
+
pronunciations = pronouncing.phones_for_word(word)
|
627 |
+
if pronunciations:
|
628 |
+
stress_pattern = pronouncing.stresses(pronunciations[0])
|
629 |
+
word_stresses.append(stress_pattern)
|
630 |
+
|
631 |
+
# Add note about stress alignment if we have enough information
|
632 |
+
if word_stresses and strong_beat_positions and len(word_stresses) >= len(strong_beat_positions):
|
633 |
+
verification_notes.append(f" β Check stress alignment on words with strong beats")
|
634 |
|
635 |
# If we found issues, add them as notes at the end of the lyrics
|
636 |
if verification_notes:
|
637 |
lyrics += "\n\n[Note: Potential rhythm mismatches in these lines:]\n"
|
638 |
lyrics += "\n".join(verification_notes)
|
639 |
+
lyrics += "\n\n[To fix mismatches:]\n"
|
640 |
+
lyrics += "1. Make sure stressed syllables fall on STRONG beats\n"
|
641 |
+
lyrics += "2. Adjust syllable counts to match the template\n"
|
642 |
+
lyrics += "3. Try using words with naturally aligned stress patterns"
|
643 |
|
644 |
return lyrics
|
645 |
|
646 |
# Modified generate_lyrics function
|
647 |
def generate_lyrics(genre, duration, emotion_results, song_structure=None):
|
648 |
+
"""Generate lyrics based on the genre, emotion, and structure analysis with enhanced rhythmic alignment."""
|
649 |
# Extract emotion and theme data from analysis results
|
650 |
primary_emotion = emotion_results["emotion_analysis"]["primary_emotion"]
|
651 |
primary_theme = emotion_results["theme_analysis"]["primary_theme"]
|
|
|
668 |
if "flexible_structure" in song_structure and song_structure["flexible_structure"]:
|
669 |
flexible = song_structure["flexible_structure"]
|
670 |
if "segments" in flexible and flexible["segments"]:
|
671 |
+
# Get the segments
|
672 |
+
segments = flexible["segments"]
|
673 |
|
674 |
+
# Process each segment to create enhanced rhythmic templates
|
675 |
+
enhanced_templates = []
|
676 |
+
|
677 |
+
for i, segment in enumerate(segments):
|
678 |
if i < 15: # Limit to 15 lines to keep prompt manageable
|
679 |
+
# Get the beat information for this segment
|
680 |
+
segment_start = segment["start"]
|
681 |
+
segment_end = segment["end"]
|
682 |
+
|
683 |
+
# Find beats within this segment
|
684 |
+
segment_beats = []
|
685 |
+
beat_times = flexible["beats"]["beat_times"]
|
686 |
+
beat_strengths = flexible["beats"].get("beat_strengths", [])
|
687 |
+
|
688 |
+
for j, beat_time in enumerate(beat_times):
|
689 |
+
if segment_start <= beat_time < segment_end:
|
690 |
+
# Add this beat to the segment
|
691 |
+
segment_beats.append(j)
|
692 |
+
|
693 |
+
# Create segment-specific beat info
|
694 |
+
segment_beats_info = {
|
695 |
+
"beat_times": [beat_times[j] for j in segment_beats],
|
696 |
+
"tempo": flexible["beats"].get("tempo", 120)
|
697 |
+
}
|
698 |
+
|
699 |
+
if beat_strengths:
|
700 |
+
segment_beats_info["beat_strengths"] = [
|
701 |
+
beat_strengths[j] for j in segment_beats
|
702 |
+
if j < len(beat_strengths)
|
703 |
+
]
|
704 |
+
|
705 |
+
# Create a phrase structure for this segment
|
706 |
+
segment_beats_info["phrases"] = [segment_beats]
|
707 |
+
|
708 |
+
# Generate enhanced template
|
709 |
+
enhanced_template = create_flexible_syllable_templates(segment_beats_info)
|
710 |
+
enhanced_templates.append(enhanced_template)
|
711 |
+
templates_for_verification.append(enhanced_template)
|
712 |
+
|
713 |
+
# Format templates for the prompt
|
714 |
+
syllable_guidance = "CRITICAL RHYTHM INSTRUCTIONS:\n"
|
715 |
+
syllable_guidance += "Match each line exactly to this rhythm pattern (STRONG beats need stressed syllables):\n\n"
|
716 |
+
syllable_guidance += format_syllable_templates_for_prompt(enhanced_templates)
|
717 |
+
|
718 |
+
# Add explanation of notation
|
719 |
+
syllable_guidance += "\n\nWhere:\n"
|
720 |
+
syllable_guidance += "- STRONG(n): Place a STRESSED syllable here, followed by (n-1) unstressed syllables\n"
|
721 |
+
syllable_guidance += "- medium(n): Place a medium-stressed or unstressed syllable here, followed by (n-1) unstressed syllables\n"
|
722 |
+
syllable_guidance += "- weak(n): Place unstressed syllables here\n"
|
723 |
+
syllable_guidance += "- β: Indicates flow from one beat to the next within a line\n"
|
724 |
|
725 |
# Fallback to traditional sections if needed
|
726 |
elif "syllables" in song_structure and song_structure["syllables"]:
|
727 |
+
syllable_guidance = "RHYTHM PATTERN INSTRUCTIONS:\n"
|
728 |
+
syllable_guidance += "Follow these syllable patterns for each section:\n\n"
|
729 |
|
730 |
for section in song_structure["syllables"]:
|
731 |
if "syllable_template" in section:
|
732 |
+
# Process to create enhanced template
|
733 |
+
section_beats_info = {
|
734 |
+
"beat_times": [beat for beat in song_structure["beats"]["beat_times"]
|
735 |
+
if section["start"] <= beat < section["end"]],
|
736 |
+
"tempo": song_structure["beats"].get("tempo", 120)
|
737 |
+
}
|
738 |
+
|
739 |
+
if "beat_strengths" in song_structure["beats"]:
|
740 |
+
section_beats_info["beat_strengths"] = [
|
741 |
+
strength for i, strength in enumerate(song_structure["beats"]["beat_strengths"])
|
742 |
+
if i < len(song_structure["beats"]["beat_times"]) and
|
743 |
+
section["start"] <= song_structure["beats"]["beat_times"][i] < section["end"]
|
744 |
+
]
|
745 |
+
|
746 |
+
# Create a phrase structure for this section
|
747 |
+
section_beats_info["phrases"] = [list(range(len(section_beats_info["beat_times"])))]
|
748 |
+
|
749 |
+
# Generate enhanced template
|
750 |
+
enhanced_template = create_flexible_syllable_templates(section_beats_info)
|
751 |
+
|
752 |
+
syllable_guidance += f"[{section['type'].capitalize()}]:\n"
|
753 |
+
syllable_guidance += format_syllable_templates_for_prompt(enhanced_template) + "\n\n"
|
754 |
+
templates_for_verification.append(section)
|
755 |
elif "syllable_count" in section:
|
756 |
syllable_guidance += f"[{section['type'].capitalize()}]: ~{section['syllable_count']} syllables total\n"
|
|
|
|
|
|
|
757 |
|
758 |
# If we couldn't get specific templates, use general guidance
|
759 |
if not syllable_guidance:
|
760 |
+
syllable_guidance = "RHYTHM ALIGNMENT INSTRUCTIONS:\n\n"
|
761 |
+
syllable_guidance += "1. Align stressed syllables with strong beats (usually beats 1 and 3 in 4/4 time)\n"
|
762 |
+
syllable_guidance += "2. Use unstressed syllables on weak beats (usually beats 2 and 4 in 4/4 time)\n"
|
763 |
+
syllable_guidance += "3. Use appropriate syllable counts based on tempo:\n"
|
764 |
+
syllable_guidance += " - Fast tempo (>120 BPM): 4-6 syllables per line\n"
|
765 |
+
syllable_guidance += " - Medium tempo (90-120 BPM): 6-8 syllables per line\n"
|
766 |
+
syllable_guidance += " - Slow tempo (<90 BPM): 8-10 syllables per line\n"
|
767 |
+
|
768 |
+
# Add examples of syllable-beat alignment with stress patterns
|
769 |
+
syllable_guidance += "\nEXAMPLES OF PERFECT RHYTHM ALIGNMENT:\n"
|
770 |
+
syllable_guidance += "Pattern: STRONG(1) β weak(1) β medium(1) β weak(1)\n"
|
771 |
+
syllable_guidance += "Lyric: 'HEAR the MU-sic PLAY'\n"
|
772 |
+
syllable_guidance += " β β β β\n"
|
773 |
+
syllable_guidance += " S w m w <- BEAT TYPE\n\n"
|
774 |
|
775 |
+
syllable_guidance += "Pattern: STRONG(2) β weak(1) β STRONG(1) β weak(2)\n"
|
776 |
+
syllable_guidance += "Lyric: 'DANC-ing TO the RHYTHM of LOVE'\n"
|
777 |
+
syllable_guidance += " β β β β β β\n"
|
778 |
+
syllable_guidance += " S S w S w w <- BEAT TYPE\n\n"
|
779 |
+
|
780 |
+
syllable_guidance += "Pattern: STRONG(1) β medium(2) β STRONG(1) β weak(1)\n"
|
781 |
+
syllable_guidance += "Lyric: 'TIME keeps FLOW-ing ON and ON'\n"
|
782 |
+
syllable_guidance += " β β β β β β\n"
|
783 |
+
syllable_guidance += " S m m S w w <- BEAT TYPE\n\n"
|
784 |
|
785 |
# Determine if we should use traditional sections or not
|
786 |
use_sections = True
|
|
|
791 |
if len(segments) > 4:
|
792 |
use_sections = False
|
793 |
|
794 |
+
# Calculate appropriate lyrics length and section distribution
|
795 |
+
try:
|
796 |
+
if song_structure and "beats" in song_structure:
|
797 |
+
beats_info = song_structure["beats"]
|
798 |
+
tempo = beats_info.get("tempo", 120)
|
799 |
+
time_signature = beats_info.get("time_signature", 4)
|
800 |
+
lines_structure = calculate_lyrics_length(duration, tempo, time_signature)
|
801 |
+
|
802 |
+
# Handle both possible return types
|
803 |
+
if isinstance(lines_structure, dict):
|
804 |
+
total_lines = lines_structure["lines_count"]
|
805 |
|
806 |
+
# Extract section line counts if available
|
807 |
+
verse_lines = 0
|
808 |
+
chorus_lines = 0
|
809 |
+
bridge_lines = 0
|
810 |
+
|
811 |
+
for section in lines_structure["sections"]:
|
812 |
+
if section["type"] == "verse":
|
813 |
+
verse_lines = section["lines"]
|
814 |
+
elif section["type"] == "chorus":
|
815 |
+
chorus_lines = section["lines"]
|
816 |
+
elif section["type"] == "bridge":
|
817 |
+
bridge_lines = section["lines"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
818 |
else:
|
819 |
+
# The function returned just an integer (old behavior)
|
820 |
+
total_lines = lines_structure
|
821 |
|
822 |
+
# Default section distribution based on total lines
|
823 |
if total_lines <= 6:
|
824 |
verse_lines = 2
|
825 |
chorus_lines = 2
|
|
|
832 |
verse_lines = 3
|
833 |
chorus_lines = 2
|
834 |
bridge_lines = 2
|
835 |
+
else:
|
836 |
+
# Fallback to simple calculation
|
837 |
total_lines = max(4, int(duration / 10))
|
838 |
|
839 |
# Default section distribution
|
840 |
+
if total_lines <= 6:
|
841 |
+
verse_lines = 2
|
842 |
+
chorus_lines = 2
|
843 |
+
bridge_lines = 0
|
844 |
+
elif total_lines <= 10:
|
845 |
+
verse_lines = 3
|
846 |
+
chorus_lines = 2
|
847 |
+
bridge_lines = 0
|
848 |
+
else:
|
849 |
+
verse_lines = 3
|
850 |
+
chorus_lines = 2
|
851 |
+
bridge_lines = 2
|
852 |
+
except Exception as e:
|
853 |
+
print(f"Error calculating lyrics length: {str(e)}")
|
854 |
+
total_lines = max(4, int(duration / 10))
|
855 |
|
856 |
+
# Default section distribution
|
857 |
+
verse_lines = 3
|
858 |
+
chorus_lines = 2
|
859 |
+
bridge_lines = 0
|
860 |
+
|
861 |
+
# Create enhanced prompt with better rhythm alignment instructions
|
862 |
+
if use_sections:
|
863 |
+
# Traditional approach with sections
|
864 |
prompt = f"""
|
865 |
You are a talented songwriter who specializes in {genre} music.
|
866 |
Write original {genre} song lyrics for a song that is {duration:.1f} seconds long.
|
|
|
871 |
- Primary emotion: {primary_emotion}
|
872 |
- Primary theme: {primary_theme}
|
873 |
|
|
|
874 |
{syllable_guidance}
|
875 |
|
876 |
+
CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
|
877 |
+
1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
|
878 |
+
2. Natural word stress patterns must match the beat strength (strong words on strong beats)
|
879 |
+
3. Line breaks should occur at phrase endings for natural breathing
|
880 |
+
4. Consonant clusters should be avoided on fast notes and strong beats
|
881 |
+
5. Open vowels (a, e, o) work better for sustained notes and syllables
|
882 |
|
883 |
The lyrics should:
|
884 |
- Perfectly capture the essence and style of {genre} music
|
|
|
905 |
- Primary emotion: {primary_emotion}
|
906 |
- Primary theme: {primary_theme}
|
907 |
|
|
|
908 |
{syllable_guidance}
|
909 |
|
910 |
+
CRITICAL PRINCIPLES FOR RHYTHMIC ALIGNMENT:
|
911 |
+
1. STRESSED syllables MUST fall on STRONG beats (marked with STRONG in the pattern)
|
912 |
+
2. Natural word stress patterns must match the beat strength (strong words on strong beats)
|
913 |
+
3. Line breaks should occur at phrase endings for natural breathing
|
914 |
+
4. Consonant clusters should be avoided on fast notes and strong beats
|
915 |
+
5. Open vowels (a, e, o) work better for sustained notes and syllables
|
916 |
+
|
917 |
+
For perfect alignment examples:
|
918 |
+
- "FEEL the RHY-thm in your SOUL" β stressed syllables on strong beats
|
919 |
+
- "to-DAY we DANCE a-LONG" β natural speech stress matches musical stress
|
920 |
+
- "WAIT-ing FOR the SUN to RISE" β syllable emphasis aligns with beat emphasis
|
921 |
|
922 |
The lyrics should:
|
923 |
- Perfectly capture the essence and style of {genre} music
|
|
|
927 |
- Match the audio segment duration of {duration:.1f} seconds
|
928 |
|
929 |
DON'T include any section labels like [Verse] or [Chorus] unless specifically instructed.
|
930 |
+
Instead, write lyrics that flow naturally and match the music's rhythm precisely.
|
931 |
|
932 |
Your lyrics:
|
933 |
"""
|
|
|
988 |
is_music, ast_results = detect_music(audio_data)
|
989 |
except Exception as e:
|
990 |
print(f"Error in music detection: {str(e)}")
|
991 |
+
return f"Error in music detection: {str(e)}", None, ast_results
|
992 |
|
993 |
if not is_music:
|
994 |
return "The uploaded audio does not appear to be music. Please upload a music file.", None, ast_results
|