Spaces:
Running
Running
File size: 5,061 Bytes
7530063 4654b73 7530063 aaa6686 3e72e5c 4654b73 7530063 43f8e66 05111e0 2b4016e 4654b73 05111e0 4654b73 05111e0 2b4016e 0fb1e5a 2b4016e 43f8e66 05111e0 0fb1e5a 19b68b4 0fb1e5a 19b68b4 d48dfdf 19b68b4 05111e0 0fb1e5a 2b4016e 05111e0 2b4016e 4654b73 7530063 0fb1e5a 05111e0 4654b73 05111e0 4654b73 aaa6686 05111e0 aaa6686 2b4016e aaa6686 05111e0 d3701bd aaa6686 05111e0 7530063 43f8e66 7530063 2b4016e 43f8e66 2b4016e 0fb1e5a 2b4016e 4654b73 05111e0 4654b73 7530063 43f8e66 7530063 4654b73 05111e0 43f8e66 4654b73 7530063 aaa6686 0fb1e5a aaa6686 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# app.py
# =============
# This is a complete app.py file for a Gradio application that allows users to upload an audio file and generate a video with frequency visualization.
import gradio as gr
import numpy as np
import librosa
import os
import cv2
import matplotlib.pyplot as plt
# Function to generate frequency visualization frames from audio
def generate_frequency_visualization(audio_path, fps, num_bars, sensitivity):
try:
# Load the audio file
y, sr = librosa.load(audio_path, sr=None)
duration = librosa.get_duration(y=y, sr=sr)
print(f"Loaded audio file with sampling rate: {sr}, and duration: {duration} seconds.")
if sr == 0 or len(y) == 0:
raise ValueError("Invalid audio file: sampling rate or audio data is zero.")
# Perform Short-Time Fourier Transform (STFT)
hop_length = int(sr / fps) # Hop length to match the desired fps
S = np.abs(librosa.stft(y, n_fft=2048, hop_length=hop_length))
S = S[:num_bars, :] # Limit the frequency bands to match the number of bars
# Normalize frequency power with sensitivity adjustment
S = (S / np.max(S)) * sensitivity
# Create a directory to save the frames
os.makedirs('frames', exist_ok=True)
# Generate and save each frame
for i in range(S.shape[1]):
# Create black background
img = np.zeros((720, 1280, 3), dtype=np.uint8)
# Get the bar heights for the current frame
heights = (S[:, i] * 600).astype(int)
# Calculate bar positions
bar_width = 80
spacing = (1280 - num_bars * bar_width) // (num_bars + 1)
for j, height in enumerate(heights):
x = spacing + j * (bar_width + spacing)
y = 720 - height
color = tuple(int(c * 255) for c in plt.cm.viridis(j / num_bars)[:3]) # Use Viridis colormap
cv2.rectangle(img, (x, 720), (x + bar_width, y), color, -1)
# Save the frame
frame_path = f'frames/frame_{i:04d}.png'
cv2.imwrite(frame_path, img)
print(f"Generated {S.shape[1]} frames for visualization.")
return 'frames', duration
except Exception as e:
print(f"Error generating frequency visualization: {e}")
return None, None
# Function to create a video from the generated frames
def create_video_from_frames(frames_directory, audio_path, fps):
try:
# Get the list of frame files
frame_files = [os.path.join(frames_directory, f) for f in os.listdir(frames_directory) if f.endswith('.png')]
frame_files.sort()
if not frame_files:
raise ValueError("No frames found to create the video.")
# Get video dimensions from the first frame
first_frame = cv2.imread(frame_files[0])
height, width, _ = first_frame.shape
# Initialize video writer
video_path = 'output_video.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(video_path, fourcc, fps, (width, height))
# Write frames to video
for frame_file in frame_files:
frame = cv2.imread(frame_file)
video_writer.write(frame)
video_writer.release()
# Merge audio with video using ffmpeg
os.system(f"ffmpeg -i {video_path} -i {audio_path} -c:v copy -c:a aac -strict experimental output_with_audio.mp4 -y")
print(f"Video created with {len(frame_files)} frames.")
return 'output_with_audio.mp4'
except Exception as e:
print(f"Error creating video from frames: {e}")
return None
# Gradio interface function
def process_audio(audio, sensitivity):
audio_path = audio
fps = 60
num_bars = 12
frames_directory, duration = generate_frequency_visualization(audio_path, fps, num_bars, sensitivity)
if frames_directory:
video_path = create_video_from_frames(frames_directory, audio_path, fps)
return video_path
else:
return None
# Create the Gradio interface with explanations and recommendations
iface = gr.Interface(
fn=process_audio,
inputs=[
gr.Audio(type="filepath", label="Upload Audio File"),
gr.Slider(minimum=0.1, maximum=5.0, step=0.1, value=1.0, label="Sensitivity")
],
outputs=gr.Video(label="Generated Video"),
title="Audio Frequency Visualization",
description="Upload an audio file to generate a video with frequency visualization. "
"Supported file types: WAV, MP3, FLAC. "
"Recommended file duration: 10 seconds to 5 minutes. "
"The visualization will consist of 12 bars representing frequency ranges. Adjust sensitivity to control bar movement.",
)
# Launch the Gradio interface
if __name__ == "__main__":
iface.launch()
# Dependencies
# =============
# The following dependencies are required to run this app:
# - librosa
# - numpy
# - opencv-python
# - matplotlib
# - ffmpeg (installed separately)
|