File size: 5,061 Bytes
7530063
 
 
 
4654b73
 
7530063
 
aaa6686
3e72e5c
4654b73
7530063
43f8e66
05111e0
 
 
2b4016e
 
4654b73
05111e0
 
4654b73
05111e0
2b4016e
 
0fb1e5a
2b4016e
43f8e66
 
05111e0
 
 
 
 
0fb1e5a
19b68b4
 
 
0fb1e5a
 
19b68b4
 
 
 
 
 
 
d48dfdf
19b68b4
 
 
 
 
05111e0
0fb1e5a
2b4016e
05111e0
 
2b4016e
4654b73
7530063
0fb1e5a
05111e0
 
 
 
4654b73
05111e0
 
4654b73
aaa6686
 
 
 
 
05111e0
aaa6686
 
 
 
 
 
 
2b4016e
aaa6686
 
 
 
05111e0
d3701bd
aaa6686
05111e0
 
 
7530063
 
43f8e66
7530063
2b4016e
 
43f8e66
2b4016e
0fb1e5a
2b4016e
 
 
4654b73
05111e0
4654b73
7530063
43f8e66
 
 
 
7530063
4654b73
05111e0
 
 
43f8e66
4654b73
 
 
7530063
 
 
 
 
 
 
 
aaa6686
0fb1e5a
aaa6686
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# app.py
# =============
# This is a complete app.py file for a Gradio application that allows users to upload an audio file and generate a video with frequency visualization.

import gradio as gr
import numpy as np
import librosa
import os
import cv2
import matplotlib.pyplot as plt

# Function to generate frequency visualization frames from audio
def generate_frequency_visualization(audio_path, fps, num_bars, sensitivity):
    try:
        # Load the audio file
        y, sr = librosa.load(audio_path, sr=None)
        duration = librosa.get_duration(y=y, sr=sr)
        print(f"Loaded audio file with sampling rate: {sr}, and duration: {duration} seconds.")

        if sr == 0 or len(y) == 0:
            raise ValueError("Invalid audio file: sampling rate or audio data is zero.")

        # Perform Short-Time Fourier Transform (STFT)
        hop_length = int(sr / fps)  # Hop length to match the desired fps
        S = np.abs(librosa.stft(y, n_fft=2048, hop_length=hop_length))
        S = S[:num_bars, :]  # Limit the frequency bands to match the number of bars

        # Normalize frequency power with sensitivity adjustment
        S = (S / np.max(S)) * sensitivity

        # Create a directory to save the frames
        os.makedirs('frames', exist_ok=True)

        # Generate and save each frame
        for i in range(S.shape[1]):
            # Create black background
            img = np.zeros((720, 1280, 3), dtype=np.uint8)

            # Get the bar heights for the current frame
            heights = (S[:, i] * 600).astype(int)

            # Calculate bar positions
            bar_width = 80
            spacing = (1280 - num_bars * bar_width) // (num_bars + 1)
            for j, height in enumerate(heights):
                x = spacing + j * (bar_width + spacing)
                y = 720 - height
                color = tuple(int(c * 255) for c in plt.cm.viridis(j / num_bars)[:3])  # Use Viridis colormap
                cv2.rectangle(img, (x, 720), (x + bar_width, y), color, -1)

            # Save the frame
            frame_path = f'frames/frame_{i:04d}.png'
            cv2.imwrite(frame_path, img)

        print(f"Generated {S.shape[1]} frames for visualization.")
        return 'frames', duration
    except Exception as e:
        print(f"Error generating frequency visualization: {e}")
        return None, None

# Function to create a video from the generated frames
def create_video_from_frames(frames_directory, audio_path, fps):
    try:
        # Get the list of frame files
        frame_files = [os.path.join(frames_directory, f) for f in os.listdir(frames_directory) if f.endswith('.png')]
        frame_files.sort()

        if not frame_files:
            raise ValueError("No frames found to create the video.")

        # Get video dimensions from the first frame
        first_frame = cv2.imread(frame_files[0])
        height, width, _ = first_frame.shape

        # Initialize video writer
        video_path = 'output_video.mp4'
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        video_writer = cv2.VideoWriter(video_path, fourcc, fps, (width, height))

        # Write frames to video
        for frame_file in frame_files:
            frame = cv2.imread(frame_file)
            video_writer.write(frame)

        video_writer.release()

        # Merge audio with video using ffmpeg
        os.system(f"ffmpeg -i {video_path} -i {audio_path} -c:v copy -c:a aac -strict experimental output_with_audio.mp4 -y")

        print(f"Video created with {len(frame_files)} frames.")
        return 'output_with_audio.mp4'
    except Exception as e:
        print(f"Error creating video from frames: {e}")
        return None

# Gradio interface function
def process_audio(audio, sensitivity):
    audio_path = audio
    fps = 60
    num_bars = 12
    frames_directory, duration = generate_frequency_visualization(audio_path, fps, num_bars, sensitivity)
    if frames_directory:
        video_path = create_video_from_frames(frames_directory, audio_path, fps)
        return video_path
    else:
        return None

# Create the Gradio interface with explanations and recommendations
iface = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(type="filepath", label="Upload Audio File"),
        gr.Slider(minimum=0.1, maximum=5.0, step=0.1, value=1.0, label="Sensitivity")
    ],
    outputs=gr.Video(label="Generated Video"),
    title="Audio Frequency Visualization",
    description="Upload an audio file to generate a video with frequency visualization. "
                "Supported file types: WAV, MP3, FLAC. "
                "Recommended file duration: 10 seconds to 5 minutes. "
                "The visualization will consist of 12 bars representing frequency ranges. Adjust sensitivity to control bar movement.",
)

# Launch the Gradio interface
if __name__ == "__main__":
    iface.launch()

# Dependencies
# =============
# The following dependencies are required to run this app:
# - librosa
# - numpy
# - opencv-python
# - matplotlib
# - ffmpeg (installed separately)