syllables_matching_experiment / emotionanalysis.py
root
copyprevious
9e21eef
import librosa
import numpy as np
try:
import matplotlib.pyplot as plt
except ImportError:
plt = None
from scipy.stats import mode
import warnings
warnings.filterwarnings('ignore') # Suppress librosa warnings
class MusicAnalyzer:
def __init__(self):
# Emotion feature mappings - these define characteristics of different emotions
self.emotion_profiles = {
'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
'sad': {'tempo': (40, 90), 'energy': (0, 0.5), 'major_mode': False, 'brightness': (0, 0.5)},
'calm': {'tempo': (50, 90), 'energy': (0, 0.4), 'major_mode': True, 'brightness': (0.3, 0.6)},
'energetic': {'tempo': (110, 200), 'energy': (0.7, 1.0), 'major_mode': True, 'brightness': (0.5, 0.9)},
'tense': {'tempo': (70, 140), 'energy': (0.5, 0.9), 'major_mode': False, 'brightness': (0.3, 0.7)},
'nostalgic': {'tempo': (60, 100), 'energy': (0.3, 0.7), 'major_mode': None, 'brightness': (0.4, 0.7)}
}
# Theme mappings based on musical features
self.theme_profiles = {
'love': {'emotion': ['happy', 'nostalgic', 'sad'], 'harmony_complexity': (0.3, 0.7)},
'triumph': {'emotion': ['energetic', 'happy'], 'harmony_complexity': (0.4, 0.8)},
'loss': {'emotion': ['sad', 'nostalgic'], 'harmony_complexity': (0.3, 0.7)},
'adventure': {'emotion': ['energetic', 'tense'], 'harmony_complexity': (0.5, 0.9)},
'reflection': {'emotion': ['calm', 'nostalgic'], 'harmony_complexity': (0.4, 0.8)},
'conflict': {'emotion': ['tense', 'energetic'], 'harmony_complexity': (0.6, 1.0)}
}
# Musical key mapping
self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
def load_audio(self, file_path, sr=22050, duration=None):
"""Load audio file and return time series and sample rate"""
try:
y, sr = librosa.load(file_path, sr=sr, duration=duration)
return y, sr
except Exception as e:
print(f"Error loading audio file: {e}")
return None, None
def analyze_rhythm(self, y, sr):
"""Analyze rhythm-related features: tempo, beats, time signature"""
# Tempo and beat detection
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
# Beat intervals and regularity
beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0])
beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0
# Rhythm pattern analysis through autocorrelation
ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
ac = librosa.util.normalize(ac, norm=np.inf)
# Time signature estimation - a challenging task with many limitations
estimated_signature = self._estimate_time_signature(y, sr, beat_times, onset_env)
# Compute onset strength to get a measure of rhythm intensity
rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0
# Rhythm complexity based on variation in onset strength
rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0
return {
"tempo": float(tempo),
"beat_times": beat_times.tolist(),
"beat_intervals": beat_intervals.tolist(),
"beat_regularity": float(beat_regularity),
"rhythm_intensity": float(rhythm_intensity),
"rhythm_complexity": float(rhythm_complexity),
"estimated_time_signature": estimated_signature
}
def _estimate_time_signature(self, y, sr, beat_times, onset_env):
"""Estimate the time signature based on beat patterns"""
# This is a simplified approach - accurate time signature detection is complex
if len(beat_times) < 4:
return "Unknown"
# Analyze beat emphasis patterns to detect meter
beat_intervals = np.diff(beat_times)
# Look for periodicity in the onset envelope
ac = librosa.autocorrelate(onset_env, max_size=sr)
# Find peaks in autocorrelation after the first one (which is at lag 0)
peaks = librosa.util.peak_pick(ac, pre_max=20, post_max=20, pre_avg=20, post_avg=20, delta=0.1, wait=1)
peaks = peaks[peaks > 0] # Remove the first peak which is at lag 0
if len(peaks) == 0:
return "4/4" # Default to most common
# Convert first significant peak to beats
first_peak_time = peaks[0] / sr
beats_per_bar = round(first_peak_time / np.median(beat_intervals))
# Map to common time signatures
if beats_per_bar == 4 or beats_per_bar == 8:
return "4/4"
elif beats_per_bar == 3 or beats_per_bar == 6:
return "3/4"
elif beats_per_bar == 2:
return "2/4"
else:
return f"{beats_per_bar}/4" # Default assumption
def analyze_tonality(self, y, sr):
"""Analyze tonal features: key, mode, harmonic features"""
# Compute chromagram
chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
# Krumhansl-Schmuckler key-finding algorithm (simplified)
# Major and minor profiles from music theory research
major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
# Calculate the correlation of the chroma with each key profile
chroma_avg = np.mean(chroma, axis=1)
major_corr = np.zeros(12)
minor_corr = np.zeros(12)
for i in range(12):
major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1]
minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1]
# Find the key with the highest correlation
max_major_idx = np.argmax(major_corr)
max_minor_idx = np.argmax(minor_corr)
# Determine if the piece is in a major or minor key
if major_corr[max_major_idx] > minor_corr[max_minor_idx]:
mode = "major"
key = self.key_names[max_major_idx]
else:
mode = "minor"
key = self.key_names[max_minor_idx]
# Calculate harmony complexity (variability in harmonic content)
harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0
# Calculate tonal stability (consistency of tonal center)
tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001) # Add small value to avoid division by zero
# Calculate spectral brightness (center of mass of the spectrum)
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
brightness = np.mean(spectral_centroid) / (sr/2) # Normalize by Nyquist frequency
# Calculate dissonance using spectral contrast
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
dissonance = np.mean(spectral_contrast[0]) # Higher values may indicate more dissonance
return {
"key": key,
"mode": mode,
"is_major": mode == "major",
"harmony_complexity": float(harmony_complexity),
"tonal_stability": float(tonal_stability),
"brightness": float(brightness),
"dissonance": float(dissonance)
}
def analyze_energy(self, y, sr):
"""Analyze energy characteristics of the audio"""
# RMS Energy (overall loudness)
rms = librosa.feature.rms(y=y)[0]
# Energy metrics
mean_energy = np.mean(rms)
energy_std = np.std(rms)
energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0
# Energy distribution across frequency ranges
spec = np.abs(librosa.stft(y))
# Divide the spectrum into low, mid, and high ranges
freq_bins = spec.shape[0]
low_freq_energy = np.mean(spec[:int(freq_bins*0.2), :])
mid_freq_energy = np.mean(spec[int(freq_bins*0.2):int(freq_bins*0.8), :])
high_freq_energy = np.mean(spec[int(freq_bins*0.8):, :])
# Normalize to create a distribution
total_energy = low_freq_energy + mid_freq_energy + high_freq_energy
if total_energy > 0:
low_freq_ratio = low_freq_energy / total_energy
mid_freq_ratio = mid_freq_energy / total_energy
high_freq_ratio = high_freq_energy / total_energy
else:
low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1/3
return {
"mean_energy": float(mean_energy),
"energy_std": float(energy_std),
"energy_dynamic_range": float(energy_dynamic_range),
"frequency_distribution": {
"low_freq": float(low_freq_ratio),
"mid_freq": float(mid_freq_ratio),
"high_freq": float(high_freq_ratio)
}
}
def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
"""Classify the emotion based on musical features"""
# Extract key features for emotion detection
tempo = rhythm_data["tempo"]
is_major = tonal_data["is_major"]
energy = energy_data["mean_energy"]
brightness = tonal_data["brightness"]
# Calculate scores for each emotion
emotion_scores = {}
for emotion, profile in self.emotion_profiles.items():
score = 0.0
# Tempo contribution (0-1 score)
tempo_range = profile["tempo"]
if tempo_range[0] <= tempo <= tempo_range[1]:
score += 1.0
else:
# Partial score based on distance
distance = min(abs(tempo - tempo_range[0]), abs(tempo - tempo_range[1]))
max_distance = 40 # Maximum distance to consider
score += max(0, 1 - (distance / max_distance))
# Energy contribution (0-1 score)
energy_range = profile["energy"]
if energy_range[0] <= energy <= energy_range[1]:
score += 1.0
else:
# Partial score based on distance
distance = min(abs(energy - energy_range[0]), abs(energy - energy_range[1]))
max_distance = 0.5 # Maximum distance to consider
score += max(0, 1 - (distance / max_distance))
# Mode contribution (0-1 score)
if profile["major_mode"] is not None: # Some emotions don't have strong mode preference
score += 1.0 if profile["major_mode"] == is_major else 0.0
else:
score += 0.5 # Neutral contribution
# Brightness contribution (0-1 score)
brightness_range = profile["brightness"]
if brightness_range[0] <= brightness <= brightness_range[1]:
score += 1.0
else:
# Partial score based on distance
distance = min(abs(brightness - brightness_range[0]), abs(brightness - brightness_range[1]))
max_distance = 0.5 # Maximum distance to consider
score += max(0, 1 - (distance / max_distance))
# Normalize score (0-1 range)
emotion_scores[emotion] = score / 4.0
# Find primary emotion
primary_emotion = max(emotion_scores.items(), key=lambda x: x[1])
# Calculate valence and arousal (dimensional emotion model)
# Mapping different emotions to valence-arousal space
valence_map = {
'happy': 0.8, 'sad': 0.2, 'calm': 0.6,
'energetic': 0.7, 'tense': 0.3, 'nostalgic': 0.5
}
arousal_map = {
'happy': 0.7, 'sad': 0.3, 'calm': 0.2,
'energetic': 0.9, 'tense': 0.8, 'nostalgic': 0.4
}
# Calculate weighted valence and arousal
total_weight = sum(emotion_scores.values())
if total_weight > 0:
valence = sum(score * valence_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
arousal = sum(score * arousal_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
else:
valence = 0.5
arousal = 0.5
return {
"primary_emotion": primary_emotion[0],
"confidence": primary_emotion[1],
"emotion_scores": emotion_scores,
"valence": float(valence), # Pleasure dimension (0-1)
"arousal": float(arousal) # Activity dimension (0-1)
}
def analyze_theme(self, rhythm_data, tonal_data, emotion_data):
"""Infer potential themes based on musical features and emotion"""
# Extract relevant features
primary_emotion = emotion_data["primary_emotion"]
harmony_complexity = tonal_data["harmony_complexity"]
# Calculate theme scores
theme_scores = {}
for theme, profile in self.theme_profiles.items():
score = 0.0
# Emotion contribution
if primary_emotion in profile["emotion"]:
# Emotions listed earlier have stronger connection to the theme
position_weight = 1.0 / (profile["emotion"].index(primary_emotion) + 1)
score += position_weight
# Secondary emotions contribution
secondary_emotions = [e for e, s in emotion_data["emotion_scores"].items()
if s > 0.5 and e != primary_emotion]
for emotion in secondary_emotions:
if emotion in profile["emotion"]:
score += 0.3 # Less weight than primary emotion
# Harmony complexity contribution
complexity_range = profile["harmony_complexity"]
if complexity_range[0] <= harmony_complexity <= complexity_range[1]:
score += 1.0
else:
# Partial score based on distance
distance = min(abs(harmony_complexity - complexity_range[0]),
abs(harmony_complexity - complexity_range[1]))
max_distance = 0.5 # Maximum distance to consider
score += max(0, 1 - (distance / max_distance))
# Normalize score
theme_scores[theme] = min(1.0, score / 2.5)
# Find primary theme
primary_theme = max(theme_scores.items(), key=lambda x: x[1])
# Find secondary themes (scores > 0.5)
secondary_themes = [(theme, score) for theme, score in theme_scores.items()
if score > 0.5 and theme != primary_theme[0]]
secondary_themes.sort(key=lambda x: x[1], reverse=True)
return {
"primary_theme": primary_theme[0],
"confidence": primary_theme[1],
"secondary_themes": [t[0] for t in secondary_themes[:2]], # Top 2 secondary themes
"theme_scores": theme_scores
}
def analyze_music(self, file_path):
"""Main function to perform comprehensive music analysis"""
# Load the audio file
y, sr = self.load_audio(file_path)
if y is None:
return {"error": "Failed to load audio file"}
# Run all analyses
rhythm_data = self.analyze_rhythm(y, sr)
tonal_data = self.analyze_tonality(y, sr)
energy_data = self.analyze_energy(y, sr)
# Higher-level analyses that depend on the basic features
emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)
# Combine all results
return {
"file": file_path,
"rhythm_analysis": rhythm_data,
"tonal_analysis": tonal_data,
"energy_analysis": energy_data,
"emotion_analysis": emotion_data,
"theme_analysis": theme_data,
"summary": {
"tempo": rhythm_data["tempo"],
"time_signature": rhythm_data["estimated_time_signature"],
"key": tonal_data["key"],
"mode": tonal_data["mode"],
"primary_emotion": emotion_data["primary_emotion"],
"primary_theme": theme_data["primary_theme"]
}
}
# def visualize_analysis(self, file_path):
# """Create visualizations for the music analysis results"""
# # Check if matplotlib is available
# if plt is None:
# print("Error: matplotlib is not installed. Visualization is not available.")
# return
#
# # Load audio and run analysis
# y, sr = self.load_audio(file_path)
# if y is None:
# print("Error: Failed to load audio file")
# return
#
# results = self.analyze_music(file_path)
#
# # Create visualization
# plt.figure(figsize=(15, 12))
# # Waveform
# plt.subplot(3, 2, 1)
# librosa.display.waveshow(y, sr=sr, alpha=0.6)
# plt.title(f'Waveform (Tempo: {results["rhythm_analysis"]["tempo"]:.1f} BPM)')
# # Spectrogram
# plt.subplot(3, 2, 2)
# D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
# librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
# plt.colorbar(format='%+2.0f dB')
# plt.title(f'Spectrogram (Key: {results["tonal_analysis"]["key"]} {results["tonal_analysis"]["mode"]})')
# # Chromagram
# plt.subplot(3, 2, 3)
# chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
# librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
# plt.colorbar()
# plt.title('Chromagram')
# # Onset strength and beats
# plt.subplot(3, 2, 4)
# onset_env = librosa.onset.onset_strength(y=y, sr=sr)
# times = librosa.times_like(onset_env, sr=sr)
# plt.plot(times, librosa.util.normalize(onset_env), label='Onset strength')
# plt.vlines(results["rhythm_analysis"]["beat_times"], 0, 1, alpha=0.5, color='r',
# linestyle='--', label='Beats')
# plt.legend()
# plt.title('Rhythm Analysis')
# # Emotion scores
# plt.subplot(3, 2, 5)
# emotions = list(results["emotion_analysis"]["emotion_scores"].keys())
# scores = list(results["emotion_analysis"]["emotion_scores"].values())
# plt.bar(emotions, scores, color='skyblue')
# plt.ylim(0, 1)
# plt.title(f'Emotion Analysis (Primary: {results["emotion_analysis"]["primary_emotion"]})')
# plt.xticks(rotation=45)
# # Theme scores
# plt.subplot(3, 2, 6)
# themes = list(results["theme_analysis"]["theme_scores"].keys())
# scores = list(results["theme_analysis"]["theme_scores"].values())
# plt.bar(themes, scores, color='lightgreen')
# plt.ylim(0, 1)
# plt.title(f'Theme Analysis (Primary: {results["theme_analysis"]["primary_theme"]})')
# plt.xticks(rotation=45)
# plt.tight_layout()
# plt.show()
# Create an instance of the analyzer
analyzer = MusicAnalyzer()
# The following code is for demonstration purposes only
# and will only run if executed directly (not when imported)
if __name__ == "__main__":
# Replace this with a real audio file path when running as a script
demo_file = "path/to/your/audio/file.mp3"
# Analyze the uploaded audio file
results = analyzer.analyze_music(demo_file)
# Print analysis summary
print("\n=== MUSIC ANALYSIS SUMMARY ===")
print(f"Tempo: {results['summary']['tempo']:.1f} BPM")
print(f"Time Signature: {results['summary']['time_signature']}")
print(f"Key: {results['summary']['key']} {results['summary']['mode']}")
print(f"Primary Emotion: {results['summary']['primary_emotion']}")
print(f"Primary Theme: {results['summary']['primary_theme']}")
# Show detailed results (optional)
import json
print("\n=== DETAILED ANALYSIS ===")
print(json.dumps(results, indent=2))
# Visualize the analysis
# analyzer.visualize_analysis(demo_file)