File size: 20,509 Bytes
9e21eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
import librosa
import numpy as np
try:
    import matplotlib.pyplot as plt
except ImportError:
    plt = None
from scipy.stats import mode
import warnings
warnings.filterwarnings('ignore')  # Suppress librosa warnings
class MusicAnalyzer:
    def __init__(self):
        # Emotion feature mappings - these define characteristics of different emotions
        self.emotion_profiles = {
            'happy': {'tempo': (100, 180), 'energy': (0.6, 1.0), 'major_mode': True, 'brightness': (0.6, 1.0)},
            'sad': {'tempo': (40, 90), 'energy': (0, 0.5), 'major_mode': False, 'brightness': (0, 0.5)},
            'calm': {'tempo': (50, 90), 'energy': (0, 0.4), 'major_mode': True, 'brightness': (0.3, 0.6)},
            'energetic': {'tempo': (110, 200), 'energy': (0.7, 1.0), 'major_mode': True, 'brightness': (0.5, 0.9)},
            'tense': {'tempo': (70, 140), 'energy': (0.5, 0.9), 'major_mode': False, 'brightness': (0.3, 0.7)},
            'nostalgic': {'tempo': (60, 100), 'energy': (0.3, 0.7), 'major_mode': None, 'brightness': (0.4, 0.7)}
        }

        # Theme mappings based on musical features
        self.theme_profiles = {
            'love': {'emotion': ['happy', 'nostalgic', 'sad'], 'harmony_complexity': (0.3, 0.7)},
            'triumph': {'emotion': ['energetic', 'happy'], 'harmony_complexity': (0.4, 0.8)},
            'loss': {'emotion': ['sad', 'nostalgic'], 'harmony_complexity': (0.3, 0.7)},
            'adventure': {'emotion': ['energetic', 'tense'], 'harmony_complexity': (0.5, 0.9)},
            'reflection': {'emotion': ['calm', 'nostalgic'], 'harmony_complexity': (0.4, 0.8)},
            'conflict': {'emotion': ['tense', 'energetic'], 'harmony_complexity': (0.6, 1.0)}
        }

        # Musical key mapping
        self.key_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']

    def load_audio(self, file_path, sr=22050, duration=None):
        """Load audio file and return time series and sample rate"""
        try:
            y, sr = librosa.load(file_path, sr=sr, duration=duration)
            return y, sr
        except Exception as e:
            print(f"Error loading audio file: {e}")
            return None, None

    def analyze_rhythm(self, y, sr):
        """Analyze rhythm-related features: tempo, beats, time signature"""
        # Tempo and beat detection
        onset_env = librosa.onset.onset_strength(y=y, sr=sr)
        tempo, beat_frames = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
        beat_times = librosa.frames_to_time(beat_frames, sr=sr)

        # Beat intervals and regularity
        beat_intervals = np.diff(beat_times) if len(beat_times) > 1 else np.array([0])
        beat_regularity = 1.0 / np.std(beat_intervals) if len(beat_intervals) > 0 and np.std(beat_intervals) > 0 else 0

        # Rhythm pattern analysis through autocorrelation
        ac = librosa.autocorrelate(onset_env, max_size=sr // 2)
        ac = librosa.util.normalize(ac, norm=np.inf)

        # Time signature estimation - a challenging task with many limitations
        estimated_signature = self._estimate_time_signature(y, sr, beat_times, onset_env)

        # Compute onset strength to get a measure of rhythm intensity
        rhythm_intensity = np.mean(onset_env) / np.max(onset_env) if np.max(onset_env) > 0 else 0

        # Rhythm complexity based on variation in onset strength
        rhythm_complexity = np.std(onset_env) / np.mean(onset_env) if np.mean(onset_env) > 0 else 0

        return {
            "tempo": float(tempo),
            "beat_times": beat_times.tolist(),
            "beat_intervals": beat_intervals.tolist(),
            "beat_regularity": float(beat_regularity),
            "rhythm_intensity": float(rhythm_intensity),
            "rhythm_complexity": float(rhythm_complexity),
            "estimated_time_signature": estimated_signature
        }

    def _estimate_time_signature(self, y, sr, beat_times, onset_env):
        """Estimate the time signature based on beat patterns"""
        # This is a simplified approach - accurate time signature detection is complex
        if len(beat_times) < 4:
            return "Unknown"

        # Analyze beat emphasis patterns to detect meter
        beat_intervals = np.diff(beat_times)

        # Look for periodicity in the onset envelope
        ac = librosa.autocorrelate(onset_env, max_size=sr)

        # Find peaks in autocorrelation after the first one (which is at lag 0)
        peaks = librosa.util.peak_pick(ac, pre_max=20, post_max=20, pre_avg=20, post_avg=20, delta=0.1, wait=1)
        peaks = peaks[peaks > 0]  # Remove the first peak which is at lag 0

        if len(peaks) == 0:
            return "4/4"  # Default to most common

        # Convert first significant peak to beats
        first_peak_time = peaks[0] / sr
        beats_per_bar = round(first_peak_time / np.median(beat_intervals))

        # Map to common time signatures
        if beats_per_bar == 4 or beats_per_bar == 8:
            return "4/4"
        elif beats_per_bar == 3 or beats_per_bar == 6:
            return "3/4"
        elif beats_per_bar == 2:
            return "2/4"
        else:
            return f"{beats_per_bar}/4"  # Default assumption

    def analyze_tonality(self, y, sr):
        """Analyze tonal features: key, mode, harmonic features"""
        # Compute chromagram
        chroma = librosa.feature.chroma_cqt(y=y, sr=sr)

        # Krumhansl-Schmuckler key-finding algorithm (simplified)
        # Major and minor profiles from music theory research
        major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
        minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])

        # Calculate the correlation of the chroma with each key profile
        chroma_avg = np.mean(chroma, axis=1)
        major_corr = np.zeros(12)
        minor_corr = np.zeros(12)

        for i in range(12):
            major_corr[i] = np.corrcoef(np.roll(chroma_avg, i), major_profile)[0, 1]
            minor_corr[i] = np.corrcoef(np.roll(chroma_avg, i), minor_profile)[0, 1]

        # Find the key with the highest correlation
        max_major_idx = np.argmax(major_corr)
        max_minor_idx = np.argmax(minor_corr)

        # Determine if the piece is in a major or minor key
        if major_corr[max_major_idx] > minor_corr[max_minor_idx]:
            mode = "major"
            key = self.key_names[max_major_idx]
        else:
            mode = "minor"
            key = self.key_names[max_minor_idx]

        # Calculate harmony complexity (variability in harmonic content)
        harmony_complexity = np.std(chroma) / np.mean(chroma) if np.mean(chroma) > 0 else 0

        # Calculate tonal stability (consistency of tonal center)
        tonal_stability = 1.0 / (np.std(chroma_avg) + 0.001)  # Add small value to avoid division by zero

        # Calculate spectral brightness (center of mass of the spectrum)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
        brightness = np.mean(spectral_centroid) / (sr/2)  # Normalize by Nyquist frequency

        # Calculate dissonance using spectral contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        dissonance = np.mean(spectral_contrast[0])  # Higher values may indicate more dissonance

        return {
            "key": key,
            "mode": mode,
            "is_major": mode == "major",
            "harmony_complexity": float(harmony_complexity),
            "tonal_stability": float(tonal_stability),
            "brightness": float(brightness),
            "dissonance": float(dissonance)
        }

    def analyze_energy(self, y, sr):
        """Analyze energy characteristics of the audio"""
        # RMS Energy (overall loudness)
        rms = librosa.feature.rms(y=y)[0]

        # Energy metrics
        mean_energy = np.mean(rms)
        energy_std = np.std(rms)
        energy_dynamic_range = np.max(rms) - np.min(rms) if len(rms) > 0 else 0

        # Energy distribution across frequency ranges
        spec = np.abs(librosa.stft(y))

        # Divide the spectrum into low, mid, and high ranges
        freq_bins = spec.shape[0]
        low_freq_energy = np.mean(spec[:int(freq_bins*0.2), :])
        mid_freq_energy = np.mean(spec[int(freq_bins*0.2):int(freq_bins*0.8), :])
        high_freq_energy = np.mean(spec[int(freq_bins*0.8):, :])

        # Normalize to create a distribution
        total_energy = low_freq_energy + mid_freq_energy + high_freq_energy
        if total_energy > 0:
            low_freq_ratio = low_freq_energy / total_energy
            mid_freq_ratio = mid_freq_energy / total_energy
            high_freq_ratio = high_freq_energy / total_energy
        else:
            low_freq_ratio = mid_freq_ratio = high_freq_ratio = 1/3

        return {
            "mean_energy": float(mean_energy),
            "energy_std": float(energy_std),
            "energy_dynamic_range": float(energy_dynamic_range),
            "frequency_distribution": {
                "low_freq": float(low_freq_ratio),
                "mid_freq": float(mid_freq_ratio),
                "high_freq": float(high_freq_ratio)
            }
        }

    def analyze_emotion(self, rhythm_data, tonal_data, energy_data):
        """Classify the emotion based on musical features"""
        # Extract key features for emotion detection
        tempo = rhythm_data["tempo"]
        is_major = tonal_data["is_major"]
        energy = energy_data["mean_energy"]
        brightness = tonal_data["brightness"]

        # Calculate scores for each emotion
        emotion_scores = {}
        for emotion, profile in self.emotion_profiles.items():
            score = 0.0

            # Tempo contribution (0-1 score)
            tempo_range = profile["tempo"]
            if tempo_range[0] <= tempo <= tempo_range[1]:
                score += 1.0
            else:
                # Partial score based on distance
                distance = min(abs(tempo - tempo_range[0]), abs(tempo - tempo_range[1]))
                max_distance = 40  # Maximum distance to consider
                score += max(0, 1 - (distance / max_distance))

            # Energy contribution (0-1 score)
            energy_range = profile["energy"]
            if energy_range[0] <= energy <= energy_range[1]:
                score += 1.0
            else:
                # Partial score based on distance
                distance = min(abs(energy - energy_range[0]), abs(energy - energy_range[1]))
                max_distance = 0.5  # Maximum distance to consider
                score += max(0, 1 - (distance / max_distance))

            # Mode contribution (0-1 score)
            if profile["major_mode"] is not None:  # Some emotions don't have strong mode preference
                score += 1.0 if profile["major_mode"] == is_major else 0.0
            else:
                score += 0.5  # Neutral contribution

            # Brightness contribution (0-1 score)
            brightness_range = profile["brightness"]
            if brightness_range[0] <= brightness <= brightness_range[1]:
                score += 1.0
            else:
                # Partial score based on distance
                distance = min(abs(brightness - brightness_range[0]), abs(brightness - brightness_range[1]))
                max_distance = 0.5  # Maximum distance to consider
                score += max(0, 1 - (distance / max_distance))

            # Normalize score (0-1 range)
            emotion_scores[emotion] = score / 4.0

        # Find primary emotion
        primary_emotion = max(emotion_scores.items(), key=lambda x: x[1])

        # Calculate valence and arousal (dimensional emotion model)
        # Mapping different emotions to valence-arousal space
        valence_map = {
            'happy': 0.8, 'sad': 0.2, 'calm': 0.6,
            'energetic': 0.7, 'tense': 0.3, 'nostalgic': 0.5
        }

        arousal_map = {
            'happy': 0.7, 'sad': 0.3, 'calm': 0.2,
            'energetic': 0.9, 'tense': 0.8, 'nostalgic': 0.4
        }

        # Calculate weighted valence and arousal
        total_weight = sum(emotion_scores.values())
        if total_weight > 0:
            valence = sum(score * valence_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
            arousal = sum(score * arousal_map[emotion] for emotion, score in emotion_scores.items()) / total_weight
        else:
            valence = 0.5
            arousal = 0.5

        return {
            "primary_emotion": primary_emotion[0],
            "confidence": primary_emotion[1],
            "emotion_scores": emotion_scores,
            "valence": float(valence),    # Pleasure dimension (0-1)
            "arousal": float(arousal)     # Activity dimension (0-1)
        }

    def analyze_theme(self, rhythm_data, tonal_data, emotion_data):
        """Infer potential themes based on musical features and emotion"""
        # Extract relevant features
        primary_emotion = emotion_data["primary_emotion"]
        harmony_complexity = tonal_data["harmony_complexity"]

        # Calculate theme scores
        theme_scores = {}
        for theme, profile in self.theme_profiles.items():
            score = 0.0

            # Emotion contribution
            if primary_emotion in profile["emotion"]:
                # Emotions listed earlier have stronger connection to the theme
                position_weight = 1.0 / (profile["emotion"].index(primary_emotion) + 1)
                score += position_weight

            # Secondary emotions contribution
            secondary_emotions = [e for e, s in emotion_data["emotion_scores"].items()
                                 if s > 0.5 and e != primary_emotion]
            for emotion in secondary_emotions:
                if emotion in profile["emotion"]:
                    score += 0.3  # Less weight than primary emotion

            # Harmony complexity contribution
            complexity_range = profile["harmony_complexity"]
            if complexity_range[0] <= harmony_complexity <= complexity_range[1]:
                score += 1.0
            else:
                # Partial score based on distance
                distance = min(abs(harmony_complexity - complexity_range[0]),
                              abs(harmony_complexity - complexity_range[1]))
                max_distance = 0.5  # Maximum distance to consider
                score += max(0, 1 - (distance / max_distance))

            # Normalize score
            theme_scores[theme] = min(1.0, score / 2.5)

        # Find primary theme
        primary_theme = max(theme_scores.items(), key=lambda x: x[1])

        # Find secondary themes (scores > 0.5)
        secondary_themes = [(theme, score) for theme, score in theme_scores.items()
                          if score > 0.5 and theme != primary_theme[0]]
        secondary_themes.sort(key=lambda x: x[1], reverse=True)

        return {
            "primary_theme": primary_theme[0],
            "confidence": primary_theme[1],
            "secondary_themes": [t[0] for t in secondary_themes[:2]],  # Top 2 secondary themes
            "theme_scores": theme_scores
        }

    def analyze_music(self, file_path):
        """Main function to perform comprehensive music analysis"""
        # Load the audio file
        y, sr = self.load_audio(file_path)
        if y is None:
            return {"error": "Failed to load audio file"}

        # Run all analyses
        rhythm_data = self.analyze_rhythm(y, sr)
        tonal_data = self.analyze_tonality(y, sr)
        energy_data = self.analyze_energy(y, sr)

        # Higher-level analyses that depend on the basic features
        emotion_data = self.analyze_emotion(rhythm_data, tonal_data, energy_data)
        theme_data = self.analyze_theme(rhythm_data, tonal_data, emotion_data)

        # Combine all results
        return {
            "file": file_path,
            "rhythm_analysis": rhythm_data,
            "tonal_analysis": tonal_data,
            "energy_analysis": energy_data,
            "emotion_analysis": emotion_data,
            "theme_analysis": theme_data,
            "summary": {
                "tempo": rhythm_data["tempo"],
                "time_signature": rhythm_data["estimated_time_signature"],
                "key": tonal_data["key"],
                "mode": tonal_data["mode"],
                "primary_emotion": emotion_data["primary_emotion"],
                "primary_theme": theme_data["primary_theme"]
            }
        }

    # def visualize_analysis(self, file_path):
    #     """Create visualizations for the music analysis results"""
    #     # Check if matplotlib is available
    #     if plt is None:
    #         print("Error: matplotlib is not installed. Visualization is not available.")
    #         return
    #     
    #     # Load audio and run analysis
    #     y, sr = self.load_audio(file_path)
    #     if y is None:
    #         print("Error: Failed to load audio file")
    #         return
    #
    #     results = self.analyze_music(file_path)
    #
    #     # Create visualization
    #     plt.figure(figsize=(15, 12))

    #     # Waveform
    #     plt.subplot(3, 2, 1)
    #     librosa.display.waveshow(y, sr=sr, alpha=0.6)
    #     plt.title(f'Waveform (Tempo: {results["rhythm_analysis"]["tempo"]:.1f} BPM)')

    #     # Spectrogram
    #     plt.subplot(3, 2, 2)
    #     D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    #     librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='log')
    #     plt.colorbar(format='%+2.0f dB')
    #     plt.title(f'Spectrogram (Key: {results["tonal_analysis"]["key"]} {results["tonal_analysis"]["mode"]})')

    #     # Chromagram
    #     plt.subplot(3, 2, 3)
    #     chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
    #     librosa.display.specshow(chroma, y_axis='chroma', x_axis='time')
    #     plt.colorbar()
    #     plt.title('Chromagram')

    #     # Onset strength and beats
    #     plt.subplot(3, 2, 4)
    #     onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    #     times = librosa.times_like(onset_env, sr=sr)
    #     plt.plot(times, librosa.util.normalize(onset_env), label='Onset strength')
    #     plt.vlines(results["rhythm_analysis"]["beat_times"], 0, 1, alpha=0.5, color='r',
    #               linestyle='--', label='Beats')
    #     plt.legend()
    #     plt.title('Rhythm Analysis')

    #     # Emotion scores
    #     plt.subplot(3, 2, 5)
    #     emotions = list(results["emotion_analysis"]["emotion_scores"].keys())
    #     scores = list(results["emotion_analysis"]["emotion_scores"].values())
    #     plt.bar(emotions, scores, color='skyblue')
    #     plt.ylim(0, 1)
    #     plt.title(f'Emotion Analysis (Primary: {results["emotion_analysis"]["primary_emotion"]})')
    #     plt.xticks(rotation=45)

    #     # Theme scores
    #     plt.subplot(3, 2, 6)
    #     themes = list(results["theme_analysis"]["theme_scores"].keys())
    #     scores = list(results["theme_analysis"]["theme_scores"].values())
    #     plt.bar(themes, scores, color='lightgreen')
    #     plt.ylim(0, 1)
    #     plt.title(f'Theme Analysis (Primary: {results["theme_analysis"]["primary_theme"]})')
    #     plt.xticks(rotation=45)

    #     plt.tight_layout()
    #     plt.show()


# Create an instance of the analyzer
analyzer = MusicAnalyzer()

# The following code is for demonstration purposes only
# and will only run if executed directly (not when imported)
if __name__ == "__main__":
    # Replace this with a real audio file path when running as a script
    demo_file = "path/to/your/audio/file.mp3"
    
    # Analyze the uploaded audio file
    results = analyzer.analyze_music(demo_file)
    
    # Print analysis summary
    print("\n=== MUSIC ANALYSIS SUMMARY ===")
    print(f"Tempo: {results['summary']['tempo']:.1f} BPM")
    print(f"Time Signature: {results['summary']['time_signature']}")
    print(f"Key: {results['summary']['key']} {results['summary']['mode']}")
    print(f"Primary Emotion: {results['summary']['primary_emotion']}")
    print(f"Primary Theme: {results['summary']['primary_theme']}")
    
    # Show detailed results (optional)
    import json
    print("\n=== DETAILED ANALYSIS ===")
    print(json.dumps(results, indent=2))
    
    # Visualize the analysis
    # analyzer.visualize_analysis(demo_file)