File size: 1,719 Bytes
4654b73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from moviepy.editor import VideoClip, AudioFileClip, CompositeVideoClip

def generate_frequency_visualization(audio_file):
    # Load the audio file
    y, sr = librosa.load(audio_file, sr=None)

    # Compute the Short-Time Fourier Transform (STFT)
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)

    # Create a figure for the visualization
    fig, ax = plt.subplots()
    img = librosa.display.specshow(D, sr=sr, ax=ax, y_axis='linear', fmax=8000)
    fig.colorbar(img, ax=ax, format="%+2.0f dB")
    ax.set(title='Frequency Visualization')
    plt.axis('off')
    plt.savefig('frequency_visualization.png', bbox_inches='tight', pad_inches=0, dpi=100)
    plt.close(fig)

    # Load the audio file
    audio_clip = AudioFileClip(audio_file)

    # Create a video clip from the frequency visualization image
    video_clip = VideoClip(lambda t: plt.imread('frequency_visualization.png'), duration=audio_clip.duration)

    # Combine the audio and video clips
    final_clip = video_clip.set_audio(audio_clip)

    # Write the final video to a file
    output_file = 'frequency_visualization.mp4'
    final_clip.write_videofile(output_file, codec='libx264', audio_codec='aac')

    return output_file

# Create the Gradio interface
iface = gr.Interface(
    fn=generate_frequency_visualization,
    inputs=gr.Audio(source="upload", type="file"),
    outputs=gr.Video(label="Frequency Visualization Video"),
    title="Audio Frequency Visualization",
    description="Upload an audio file to generate a video with frequency visualization."
)

# Launch the Gradio interface
iface.launch()