|
import librosa |
|
import numpy as np |
|
try: |
|
import matplotlib.pyplot as plt |
|
except ImportError: |
|
plt = None |
|
from scipy.stats import mode |
|
import warnings |
|
warnings.filterwarnings('ignore') |
|
class MusicAnalyzer: |
|
def __init__(self): |
|
|
|
self.emotion_profiles = { |
|
'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)}, |
|
'sad': {'tempo': (40, 90), 'energy': (0, 0.5), 'major_mode': False, 'brightness': (0, 0.5)}, |
|
'calm': {'tempo': (50, 90), 'energy': (0, 0.4), 'major_mode': True, 'brightness': (0.3, 0.6)}, |
|
'energetic': {'tempo': (110, 200), 'energy': (0.7, 1.0), 'major_mode': True, 'brightness': (0.5, 0.9)}, |
|
'tense': {'tempo': (70, 140), 'energy': (0.5, 0.9), 'major_mode': False, 'brightness': (0.3, 0.7)}, |
|
'nostalgic': {'tempo': (60, 100), 'energy': (0.3, 0.7), 'major_mode': None, 'brightness': (0.4, 0.7)} |
|
} |
|
|
|
|
|
self.theme_profiles = { |
|
'love': {'emotion': ['happy', 'nostalgic', 'sad'], 'harmony_complexity': (0.3, 0.7)}, |
|
'triumph': {'emotion': ['energetic', 'happy'], 'harmony_complexity': (0.4, 0.8)}, |
|
'loss': {'emotion': ['sad', 'nostalgic'], 'harmony_complexity': (0.3, 0.7)}, |
|
'adventure': {'emotion': ['energetic', 'tense'], 'harmony_complexity': (0.5, 0.9)}, |
|
'reflection': {'emotion': ['calm', 'nostalgic'], 'harmony_complexity': (0.4, 0.8)}, |
|
'conflict': {'emotion': ['tense', 'energetic'], 'harmony_complexity': (0.6, 1.0)} |
|
} |
|
|
|
|
|
self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] |
|
|
|
def load_audio(self, file_path, sr=22050, duration=None): |
|
"""Load audio file and return time series and sample rate""" |
|
try: |
|
y, sr = librosa.load(file_path, sr=sr, duration=duration) |
|
return y, sr |
|
except Exception as e: |
|
print(f"Error loading audio file: {e}") |
|
return None, None |
|
|
|
def analyze_rhythm(self, y, sr): |
|
"""Analyze rhythm-related features: tempo, beats, time signature""" |
|
|
|
onset_env = librosa.onset.onset_strength(y=y, sr=sr) |
|
tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr) |
|
beat_times = librosa.frames_to_time(beat_frames, sr=sr) |
|
|
|
|
|
beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0]) |
|
beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0 |
|
|
|
|
|
ac = librosa.autocorrelate(onset_env, max_size=sr // 2) |
|
ac = librosa.util.normalize(ac, norm=np.inf) |
|
|
|
|
|
estimated_signature = self._estimate_time_signature(y, sr, beat_times, onset_env) |
|
|
|
|
|
rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0 |
|
|
|
|
|
rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0 |
|
|
|
return { |
|
"tempo": float(tempo), |
|
"beat_times": beat_times.tolist(), |
|
"beat_intervals": beat_intervals.tolist(), |
|
"beat_regularity": float(beat_regularity), |
|
"rhythm_intensity": float(rhythm_intensity), |
|
"rhythm_complexity": float(rhythm_complexity), |
|
"estimated_time_signature": estimated_signature |
|
} |
|
|
|
def _estimate_time_signature(self, y, sr, beat_times, onset_env): |
|
"""Estimate the time signature based on beat patterns""" |
|
|
|
if len(beat_times) < 4: |
|
return "Unknown" |
|
|
|
|
|
beat_intervals = np.diff(beat_times) |
|
|
|
|
|
ac = librosa.autocorrelate(onset_env, max_size=sr) |
|
|
|
|
|
peaks = librosa.util.peak_pick(ac, pre_max=20, post_max=20, pre_avg=20, post_avg=20, delta=0.1, wait=1) |
|
peaks = peaks[peaks > 0] |
|
|
|
if len(peaks) == 0: |
|
return "4/4" |
|
|
|
|
|
first_peak_time = peaks[0] / sr |
|
beats_per_bar = round(first_peak_time / np.median(beat_intervals)) |
|
|
|
|
|
if beats_per_bar == 4 or beats_per_bar == 8: |
|
return "4/4" |
|
elif beats_per_bar == 3 or beats_per_bar == 6: |
|
return "3/4" |
|
elif beats_per_bar == 2: |
|
return "2/4" |
|
else: |
|
return f"{beats_per_bar}/4" |
|
|
|
def analyze_tonality(self, y, sr): |
|
"""Analyze tonal features: key, mode, harmonic features""" |
|
|
|
chroma = librosa.feature.chroma_cqt(y=y, sr=sr) |
|
|
|
|
|
|
|
major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]) |
|
minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]) |
|
|
|
|
|
chroma_avg = np.mean(chroma, axis=1) |
|
major_corr = np.zeros(12) |
|
minor_corr = np.zeros(12) |
|
|
|
for i in range(12): |
|
major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1] |
|
minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1] |
|
|
|
|
|
max_major_idx = np.argmax(major_corr) |
|
max_minor_idx = np.argmax(minor_corr) |
|
|
|
|
|
if major_corr[max_major_idx] > minor_corr[max_minor_idx]: |
|
mode = "major" |
|
key = self.key_names[max_major_idx] |
|
else: |
|
mode = "minor" |
|
key = self.key_names[max_minor_idx] |
|
|
|
|
|
harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0 |
|
|
|
|
|
tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001) |
|
|
|
|
|
spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0] |
|
brightness = np.mean(spectral_centroid) / (sr/2) |
|
|
|
|
|
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) |
|
dissonance = np.mean(spectral_contrast[0]) |
|
|
|
return { |
|
"key": key, |
|
"mode": mode, |
|
"is_major": mode == "major", |
|
"harmony_complexity": float(harmony_complexity), |
|
"tonal_stability": float(tonal_stability), |
|
"brightness": float(brightness), |
|
"dissonance": float(dissonance) |
|
} |
|
|
|
def analyze_energy(self, y, sr): |
|
"""Analyze energy characteristics of the audio""" |
|
|
|
rms = librosa.feature.rms(y=y)[0] |
|
|
|
|
|
mean_energy = np.mean(rms) |
|
energy_std = np.std(rms) |
|
energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0 |
|
|
|
|
|
spec = np.abs(librosa.stft(y)) |
|
|
|
|
|
freq_bins = spec.shape[0] |
|
low_freq_energy = np.mean(spec[:int(freq_bins*0.2), :]) |
|
mid_freq_energy = np.mean(spec[int(freq_bins*0.2):int(freq_bins*0.8), :]) |
|
high_freq_energy = np.mean(spec[int(freq_bins*0.8):, :]) |
|
|
|
|
|
total_energy = low_freq_energy + mid_freq_energy + high_freq_energy |
|
if total_energy > 0: |
|
low_freq_ratio = low_freq_energy / total_energy |
|
mid_freq_ratio = mid_freq_energy / total_energy |
|
high_freq_ratio = high_freq_energy / total_energy |
|
else: |
|
low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1/3 |
|
|
|
return { |
|
"mean_energy": float(mean_energy), |
|
"energy_std": float(energy_std), |
|
"energy_dynamic_range": float(energy_dynamic_range), |
|
"frequency_distribution": { |
|
"low_freq": float(low_freq_ratio), |
|
"mid_freq": float(mid_freq_ratio), |
|
"high_freq": float(high_freq_ratio) |
|
} |
|
} |
|
|
|
def analyze_emotion(self, rhythm_data, tonal_data, energy_data): |
|
"""Classify the emotion based on musical features""" |
|
|
|
tempo = rhythm_data["tempo"] |
|
is_major = tonal_data["is_major"] |
|
energy = energy_data["mean_energy"] |
|
brightness = tonal_data["brightness"] |
|
|
|
|
|
emotion_scores = {} |
|
for emotion, profile in self.emotion_profiles.items(): |
|
score = 0.0 |
|
|
|
|
|
tempo_range = profile["tempo"] |
|
if tempo_range[0] <= tempo <= tempo_range[1]: |
|
score += 1.0 |
|
else: |
|
|
|
distance = min(abs(tempo - tempo_range[0]), abs(tempo - tempo_range[1])) |
|
max_distance = 40 |
|
score += max(0, 1 - (distance / max_distance)) |
|
|
|
|
|
energy_range = profile["energy"] |
|
if energy_range[0] <= energy <= energy_range[1]: |
|
score += 1.0 |
|
else: |
|
|
|
distance = min(abs(energy - energy_range[0]), abs(energy - energy_range[1])) |
|
max_distance = 0.5 |
|
score += max(0, 1 - (distance / max_distance)) |
|
|
|
|
|
if profile["major_mode"] is not None: |
|
score += 1.0 if profile["major_mode"] == is_major else 0.0 |
|
else: |
|
score += 0.5 |
|
|
|
|
|
brightness_range = profile["brightness"] |
|
if brightness_range[0] <= brightness <= brightness_range[1]: |
|
score += 1.0 |
|
else: |
|
|
|
distance = min(abs(brightness - brightness_range[0]), abs(brightness - brightness_range[1])) |
|
max_distance = 0.5 |
|
score += max(0, 1 - (distance / max_distance)) |
|
|
|
|
|
emotion_scores[emotion] = score / 4.0 |
|
|
|
|
|
primary_emotion = max(emotion_scores.items(), key=lambda x: x[1]) |
|
|
|
|
|
|
|
valence_map = { |
|
'happy': 0.8, 'sad': 0.2, 'calm': 0.6, |
|
'energetic': 0.7, 'tense': 0.3, 'nostalgic': 0.5 |
|
} |
|
|
|
arousal_map = { |
|
'happy': 0.7, 'sad': 0.3, 'calm': 0.2, |
|
'energetic': 0.9, 'tense': 0.8, 'nostalgic': 0.4 |
|
} |
|
|
|
|
|
total_weight = sum(emotion_scores.values()) |
|
if total_weight > 0: |
|
valence = sum(score * valence_map[emotion] for emotion, score in emotion_scores.items()) / total_weight |
|
arousal = sum(score * arousal_map[emotion] for emotion, score in emotion_scores.items()) / total_weight |
|
else: |
|
valence = 0.5 |
|
arousal = 0.5 |
|
|
|
return { |
|
"primary_emotion": primary_emotion[0], |
|
"confidence": primary_emotion[1], |
|
"emotion_scores": emotion_scores, |
|
"valence": float(valence), |
|
"arousal": float(arousal) |
|
} |
|
|
|
def analyze_theme(self, rhythm_data, tonal_data, emotion_data): |
|
"""Infer potential themes based on musical features and emotion""" |
|
|
|
primary_emotion = emotion_data["primary_emotion"] |
|
harmony_complexity = tonal_data["harmony_complexity"] |
|
|
|
|
|
theme_scores = {} |
|
for theme, profile in self.theme_profiles.items(): |
|
score = 0.0 |
|
|
|
|
|
if primary_emotion in profile["emotion"]: |
|
|
|
position_weight = 1.0 / (profile["emotion"].index(primary_emotion) + 1) |
|
score += position_weight |
|
|
|
|
|
secondary_emotions = [e for e, s in emotion_data["emotion_scores"].items() |
|
if s > 0.5 and e != primary_emotion] |
|
for emotion in secondary_emotions: |
|
if emotion in profile["emotion"]: |
|
score += 0.3 |
|
|
|
|
|
complexity_range = profile["harmony_complexity"] |
|
if complexity_range[0] <= harmony_complexity <= complexity_range[1]: |
|
score += 1.0 |
|
else: |
|
|
|
distance = min(abs(harmony_complexity - complexity_range[0]), |
|
abs(harmony_complexity - complexity_range[1])) |
|
max_distance = 0.5 |
|
score += max(0, 1 - (distance / max_distance)) |
|
|
|
|
|
theme_scores[theme] = min(1.0, score / 2.5) |
|
|
|
|
|
primary_theme = max(theme_scores.items(), key=lambda x: x[1]) |
|
|
|
|
|
secondary_themes = [(theme, score) for theme, score in theme_scores.items() |
|
if score > 0.5 and theme != primary_theme[0]] |
|
secondary_themes.sort(key=lambda x: x[1], reverse=True) |
|
|
|
return { |
|
"primary_theme": primary_theme[0], |
|
"confidence": primary_theme[1], |
|
"secondary_themes": [t[0] for t in secondary_themes[:2]], |
|
"theme_scores": theme_scores |
|
} |
|
|
|
def analyze_music(self, file_path): |
|
"""Main function to perform comprehensive music analysis""" |
|
|
|
y, sr = self.load_audio(file_path) |
|
if y is None: |
|
return {"error": "Failed to load audio file"} |
|
|
|
|
|
rhythm_data = self.analyze_rhythm(y, sr) |
|
tonal_data = self.analyze_tonality(y, sr) |
|
energy_data = self.analyze_energy(y, sr) |
|
|
|
|
|
emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data) |
|
theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data) |
|
|
|
|
|
return { |
|
"file": file_path, |
|
"rhythm_analysis": rhythm_data, |
|
"tonal_analysis": tonal_data, |
|
"energy_analysis": energy_data, |
|
"emotion_analysis": emotion_data, |
|
"theme_analysis": theme_data, |
|
"summary": { |
|
"tempo": rhythm_data["tempo"], |
|
"time_signature": rhythm_data["estimated_time_signature"], |
|
"key": tonal_data["key"], |
|
"mode": tonal_data["mode"], |
|
"primary_emotion": emotion_data["primary_emotion"], |
|
"primary_theme": theme_data["primary_theme"] |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
analyzer = MusicAnalyzer() |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
demo_file = "path/to/your/audio/file.mp3" |
|
|
|
|
|
results = analyzer.analyze_music(demo_file) |
|
|
|
|
|
print("\n=== MUSIC ANALYSIS SUMMARY ===") |
|
print(f"Tempo: {results['summary']['tempo']:.1f} BPM") |
|
print(f"Time Signature: {results['summary']['time_signature']}") |
|
print(f"Key: {results['summary']['key']} {results['summary']['mode']}") |
|
print(f"Primary Emotion: {results['summary']['primary_emotion']}") |
|
print(f"Primary Theme: {results['summary']['primary_theme']}") |
|
|
|
|
|
import json |
|
print("\n=== DETAILED ANALYSIS ===") |
|
print(json.dumps(results, indent=2)) |
|
|
|
|
|
|