Spaces:
Running
Running
File size: 4,302 Bytes
7530063 4654b73 7530063 4654b73 7530063 05111e0 d3701bd 4654b73 05111e0 4654b73 05111e0 d3701bd 05111e0 516a722 05111e0 516a722 05111e0 516a722 4752cc4 05111e0 7530063 4654b73 05111e0 7530063 05111e0 7530063 4654b73 7530063 05111e0 4654b73 05111e0 4654b73 05111e0 16f11e5 05111e0 d3701bd 05111e0 7530063 4654b73 05111e0 4654b73 7530063 05111e0 7530063 4654b73 05111e0 4654b73 7530063 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# app.py
# =============
# This is a complete app.py file for a Gradio application that allows users to upload an audio file and generate a video with frequency visualization.
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
import os
import moviepy.video.io.ImageSequenceClip
# Function to generate frequency visualization frames from audio
def generate_frequency_visualization(audio_path):
try:
# Load the audio file
y, sr = librosa.load(audio_path, sr=None)
print(f"Loaded audio file with sampling rate: {sr}, and duration: {librosa.get_duration(y=y, sr=sr)} seconds.")
if sr == 0 or len(y) == 0:
raise ValueError("Invalid audio file: sampling rate or audio data is zero.")
# Perform Short-Time Fourier Transform (STFT)
n_fft = 2048 # Ensure n_fft is set to a valid number
hop_length = 512 # Ensure hop_length is set to a valid number
D = librosa.amplitude_to_db(np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length)), ref=np.max)
# Create a directory to save the frames
os.makedirs('frames', exist_ok=True)
# Generate and save each frame
for i in range(D.shape[1]): # Iterate over columns of D (time frames)
plt.figure(figsize=(10, 6))
librosa.display.specshow(D[:, i].reshape(-1, 1), sr=sr, x_axis='time', y_axis='log', hop_length=hop_length, cmap='viridis')
plt.axis('off')
plt.savefig(f'frames/frame_{i:04d}.png', bbox_inches='tight', pad_inches=0)
plt.close()
print(f"Generated {D.shape[1]} frames for visualization.")
return 'frames'
except Exception as e:
print(f"Error generating frequency visualization: {e}")
# Fallback: Generate a default visualization
generate_default_visualization()
return 'frames'
# Function to generate a default visualization
def generate_default_visualization():
# Create a directory to save the frames
os.makedirs('frames', exist_ok=True)
# Generate and save default frames
for i in range(10): # Generate 10 default frames
plt.figure(figsize=(10, 6))
plt.plot(np.sin(np.linspace(0, 10, 100)) * (i + 1))
plt.axis('off')
plt.savefig(f'frames/frame_{i:04d}.png', bbox_inches='tight', pad_inches=0)
plt.close()
# Function to create a video from the generated frames
def create_video_from_frames(frames_directory):
try:
# Get the list of frame files
frame_files = [os.path.join(frames_directory, f) for f in os.listdir(frames_directory) if f.endswith('.png')]
frame_files.sort()
if not frame_files:
raise ValueError("No frames found to create the video.")
# Create a video from the frames
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(frame_files, fps=10) # Set fps to 10 for better visibility
video_path = 'output_video.mp4'
clip.write_videofile(video_path, codec='libx264')
print(f"Video created with {len(frame_files)} frames.")
return video_path
except Exception as e:
print(f"Error creating video from frames: {e}")
return None
# Gradio interface function
def process_audio(audio):
audio_path = audio
frames_directory = generate_frequency_visualization(audio_path)
video_path = create_video_from_frames(frames_directory)
return video_path
# Create the Gradio interface with explanations and recommendations
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath", label="Upload Audio File"),
outputs=gr.Video(label="Generated Video"),
title="Audio Frequency Visualization",
description="Upload an audio file to generate a video with frequency visualization. "
"Supported file types: WAV, MP3, FLAC. "
"Recommended file duration: 10 seconds to 5 minutes. "
"If the file is invalid or cannot be processed, a default visualization will be generated.",
)
# Launch the Gradio interface
if __name__ == "__main__":
iface.launch()
# Dependencies
# =============
# The following dependencies are required to run this app:
# - librosa
# - numpy
# - matplotlib
# - moviepy
|