Spaces:

jacob-c
/

syllables_matching_experiment

Sleeping

App Files Files Community

root commited on 2 days ago

Commit

d6fb232

1 Parent(s): c6a60af

token extend

Browse files

Files changed (1) hide show

app.py +30 -50

app.py CHANGED Viewed

@@ -1952,7 +1952,10 @@ The lyrics should:
 IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
 where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
-even if there are no rhythm issues.
 Your lyrics:
 """
@@ -2002,7 +2005,10 @@ Instead, write lyrics that flow naturally and match the music's rhythm precisely
 IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
 where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
-even if there are no rhythm issues.
 Your lyrics:
 """
@@ -2012,23 +2018,12 @@ Your lyrics:
         {"role": "user", "content": content}
     ]
-    # Apply chat template with thinking enabled
-    try:
-        # Try using the model-specific template with thinking enabled
-        text = llm_tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True,
-            enable_thinking=True  # Only works with models that support thinking mode
-        )
-    except Exception as e:
-        # Fallback to standard template if thinking mode not supported
-        print(f"Thinking mode not supported, using standard template: {str(e)}")
-        text = llm_tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
     # Generate lyrics using the LLM
     model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
@@ -2038,9 +2033,9 @@ Your lyrics:
         "do_sample": True,
         "temperature": 0.6,  # Lower for more consistent rhythm alignment
         "top_p": 0.95,
-        "top_k": 20,
         "repetition_penalty": 1.2,
-        "max_new_tokens": 1024  # Allow more tokens for comprehensive lyrics
     }
     # Generate output
@@ -2052,35 +2047,20 @@ Your lyrics:
     # Extract output tokens
     output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
-    # Try to find </think> token to separate thinking from final answer if the model supports it
-    try:
-        # Look for thinking mode tokens - check model-specific token IDs
-        # For Qwen3, the </think> token ID is 151668
-        think_end_tokens = {
-            "qwen": 151668,  # Qwen </think> token
-            "claude": 42,    # Example for Claude (placeholder)
-            "llama": 128001  # Example for Llama (placeholder)
-        }
-        # Try to find a known token
-        found_token = None
-        token_position = 0
-        for model_name, token_id in think_end_tokens.items():
-            if token_id in output_ids:
-                found_token = token_id
-                token_position = output_ids.index(token_id) + 1
-                break
-        # Use the position of the thinking token if found
-        if found_token:
-            lyrics = llm_tokenizer.decode(output_ids[token_position:], skip_special_tokens=True).strip()
-        else:
-            lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
-    except (ValueError, IndexError, AttributeError) as e:
-        print(f"Error processing thinking output: {str(e)}")
-        # Default behavior if thinking mode processing fails
-        lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
     # Verify syllable counts with enhanced verification
     if templates_for_verification:

 IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
 where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
+even if there are no rhythm issues. Include the following in your analysis:
+1. Syllable counts for each line and how they match the rhythm pattern
+2. Where stressed syllables align with strong beats
+3. Any potential misalignments or improvements
 Your lyrics:
 """
 IMPORTANT: Your generated lyrics must be followed by a section titled "[RHYTHM_ANALYSIS_SECTION]"
 where you analyze how well the lyrics align with the musical rhythm. This section MUST appear
+even if there are no rhythm issues. Include the following in your analysis:
+1. Syllable counts for each line and how they match the rhythm pattern
+2. Where stressed syllables align with strong beats
+3. Any potential misalignments or improvements
 Your lyrics:
 """
         {"role": "user", "content": content}
     ]
+    # Apply standard chat template without thinking enabled
+    text = llm_tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
     # Generate lyrics using the LLM
     model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
         "do_sample": True,
         "temperature": 0.6,  # Lower for more consistent rhythm alignment
         "top_p": 0.95,
+        "top_k": 50,  # Increased from 20 for more diversity
         "repetition_penalty": 1.2,
+        "max_new_tokens": 2048  # Doubled from 1024 for more comprehensive lyrics
     }
     # Generate output
     # Extract output tokens
     output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
+    # Skip the thinking process completely and just get the raw output
+    lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+    # If we find <thinking> tags, extract only the content after </thinking>
+    if "<thinking>" in lyrics and "</thinking>" in lyrics:
+        lyrics = lyrics.split("</thinking>")[1].strip()
+    # Remove any other thinking indicators that might be present
+    thinking_markers = ["<think>", "</think>", "[thinking]", "[/thinking]", "I'll think step by step:"]
+    for marker in thinking_markers:
+        if marker in lyrics:
+            parts = lyrics.split(marker)
+            if len(parts) > 1:
+                lyrics = parts[-1].strip()  # Take the last part after any thinking marker
     # Verify syllable counts with enhanced verification
     if templates_for_verification: