Ivan000's picture
Create app.py
4654b73 verified
raw
history blame
1.72 kB
import gradio as gr
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from moviepy.editor import VideoClip, AudioFileClip, CompositeVideoClip
def generate_frequency_visualization(audio_file):
# Load the audio file
y, sr = librosa.load(audio_file, sr=None)
# Compute the Short-Time Fourier Transform (STFT)
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
# Create a figure for the visualization
fig, ax = plt.subplots()
img = librosa.display.specshow(D, sr=sr, ax=ax, y_axis='linear', fmax=8000)
fig.colorbar(img, ax=ax, format="%+2.0f dB")
ax.set(title='Frequency Visualization')
plt.axis('off')
plt.savefig('frequency_visualization.png', bbox_inches='tight', pad_inches=0, dpi=100)
plt.close(fig)
# Load the audio file
audio_clip = AudioFileClip(audio_file)
# Create a video clip from the frequency visualization image
video_clip = VideoClip(lambda t: plt.imread('frequency_visualization.png'), duration=audio_clip.duration)
# Combine the audio and video clips
final_clip = video_clip.set_audio(audio_clip)
# Write the final video to a file
output_file = 'frequency_visualization.mp4'
final_clip.write_videofile(output_file, codec='libx264', audio_codec='aac')
return output_file
# Create the Gradio interface
iface = gr.Interface(
fn=generate_frequency_visualization,
inputs=gr.Audio(source="upload", type="file"),
outputs=gr.Video(label="Frequency Visualization Video"),
title="Audio Frequency Visualization",
description="Upload an audio file to generate a video with frequency visualization."
)
# Launch the Gradio interface
iface.launch()