Spaces:

surya5289
/

Spectral_Analysis

Sleeping

App Files Files Community

Spectral_Analysis / app.py

surya5289

Update app.py

9a27afb verified 9 months ago

raw

history blame contribute delete

5.3 kB

	import librosa
	import numpy as np
	import matplotlib.pyplot as plt
	import cv2
	import io
	import tempfile
	from PIL import Image
	import gradio as gr
	from gradio_imageslider import ImageSlider

	def generate_mel_spectrogram(audio_path, sr=22050, n_mels=128, fmin=0, fmax=7000):
	# Load audio file
	y, sr = librosa.load(audio_path, sr=sr)

	# Generate Mel Spectrogram
	S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmin=fmin, fmax=fmax)
	S_dB = librosa.power_to_db(S, ref=np.max)

	return S_dB, y, sr

	def detect_zero_db(spectrogram,threshold,tol):
	# Create a binary mask where the spectrogram values are close to 0 dB
	# +0 dB threshold
	mask = np.isclose(spectrogram, threshold, atol=tol) # Use a tolerance to include values close to 0 dB

	return mask

	def plot_spectrogram(spectrogram, file_path):
	# Plot the Mel Spectrogram and save it to a file
	plt.figure(figsize=(6, 6))
	plt.axis('off')
	librosa.display.specshow(spectrogram, sr=22050, x_axis='time', y_axis='mel', fmin=0, fmax=7000)
	plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
	plt.close()

	def plot_edge_spectrogram(edges, file_path):
	# Plot the Edge Detected Spectrogram and save it to a file
	plt.figure(figsize=(6, 6))
	plt.axis('off')
	plt.imshow(edges, cmap='gray', aspect='auto', origin='lower')
	plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
	plt.close()

	def plot_frequency(times, frequencies, label, color, file_path):
	plt.figure(figsize=(12, 6))
	plt.plot(times, frequencies, label=label, color=color, linewidth=2)
	plt.title(f'{label} Frequency')
	plt.xlabel('Time (s)')
	plt.ylabel('Frequency (Hz)')
	plt.legend()

	# Save to file
	plt.savefig(file_path, format='png', bbox_inches='tight', pad_inches=0)
	plt.close()

	def process_audio( threshold, audio_file,tol):
	mel_spectrogram, y, sr = generate_mel_spectrogram(audio_file)
	edges = detect_zero_db(mel_spectrogram,threshold,tol)

	# Create temporary files to save the generated images
	with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as mel_file, \
	tempfile.NamedTemporaryFile(suffix=".png", delete=False) as edge_file, \
	tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f0_file, \
	tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f1_file, \
	tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f2_file:

	mel_spectrogram_img = mel_file.name
	edge_spectrogram_img = edge_file.name
	f0_img = f0_file.name
	f1_img = f1_file.name
	f2_img = f2_file.name

	# Save the Mel spectrogram and edge-detected spectrogram to the temporary files
	plot_spectrogram(mel_spectrogram, mel_spectrogram_img)
	plot_edge_spectrogram(edges, edge_spectrogram_img)

	# Extract and save individual frequency plots
	f0, voiced_flag, voiced_probs = librosa.pyin(y, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))
	times = librosa.times_like(f0, sr=sr)

	plot_frequency(times, f0, 'F0', 'cyan', f0_img)

	# Formant frequency (F1 and F2) detection using LPC
	lpc_order = 5 # LPC order for formant estimation
	formants = np.empty((times.shape[0], 2)) # F1 and F2
	formants[:] = np.nan # Initialize with NaN for unvoiced frames

	for i in range(len(times)):
	if voiced_flag[i] and i * sr < len(y):
	frame = y[int(i * sr):int(i * sr + sr)] # 1 frame
	if len(frame) == 0:
	continue

	# Apply LPC
	A = librosa.lpc(frame, order = lpc_order)
	rts = np.roots(A)
	rts = rts[np.imag(rts) >= 0]
	angz = np.arctan2(np.imag(rts), np.real(rts))
	frqs = angz * (sr / (2 * np.pi))
	frqs = np.sort(frqs)

	if len(frqs) >= 2:
	formants[i, 0] = frqs[0] # F1
	formants[i, 1] = frqs[1] # F2

	plot_frequency(times, formants[:, 0], 'F1', 'magenta', f1_img)
	plot_frequency(times, formants[:, 1], 'F2', 'yellow', f2_img)

	return [mel_spectrogram_img, edge_spectrogram_img], f0_img, f1_img, f2_img

	with gr.Blocks() as demo:
	with gr.Group():
	threshold_slider =gr.Slider(-100,0,value=-2,info="Choose between -100 and 0", label = "db Level")
	tol_slider =gr.Slider(0,45,value=30,info="Choose between 0 and 25", label = "Tolerance")
	audio_input = gr.Audio(label="Upload an audio file in WAV format", type="filepath")
	submit_button = gr.Button("Submit")
	img_slider = ImageSlider(label="Before and After Edge Detection", type="filepath", slider_color="pink")
	f0_plot = gr.Image(label="F0 Frequency Plot", type="filepath")
	f1_plot = gr.Image(label="F1 Frequency Plot", type="filepath")
	f2_plot = gr.Image(label="F2 Frequency Plot", type="filepath")


	submit_button.click(process_audio, inputs=[ threshold_slider, audio_input,tol_slider], outputs=[img_slider, f0_plot, f1_plot, f2_plot])

	if __name__ == "__main__":
	demo.launch()