silence-demo / app.py
ibombonato's picture
Update app.py
b1e98dc
raw
history blame
2.72 kB
import gradio as gr
import matplotlib.pyplot as plt
import subprocess
import re
import logging
import os
import numpy as np
import matplotlib
import scipy.io
import scipy.io.wavfile
from pathlib import Path
matplotlib.use('Agg')
logging.basicConfig(level=logging.INFO)
logging.getLogger()
def get_chunk_times(in_filename, silence_threshold, silence_duration=1):
silence_duration_re = re.compile('silence_duration: (\d+.\d+)')
silence_end_re = re.compile('silence_end: (\d+.\d+)\s')
logging.info(f"File {in_filename} exists? = {os.path.exists(in_filename)}")
fpath = Path(in_filename).absolute()
logging.info(f"Absolue path: {fpath}")
command = f"ffmpeg -i {fpath} -af silencedetect=n=-{silence_threshold}dB:d={silence_duration} -f null - "
out = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
stdout, stderr = out.communicate()
lines = stdout.splitlines()
ts = 0
chunks = []
for line in lines:
match = silence_duration_re.search(str(line))
if(match):
chunk_time = float(match.group(1))
ts = ts + chunk_time
end = silence_end_re.search(str(line))
if(end):
t_end = float(end.group(1))
t_start = t_end - chunk_time
chunks.append([t_start, t_end, chunks])
logging.info(f"TS audio {os.path.basename(in_filename)} = {ts}")
return ts, chunks
def get_audio_plot(filename, chunks):
fig, ax = plt.subplots()
fig.set_size_inches(18.5, 10.5)
sampleRate, audioBuffer = scipy.io.wavfile.read(filename)
duration = len(audioBuffer)/sampleRate
time = np.arange(0,duration,1/sampleRate) #time vector
ax.plot(time,audioBuffer)
y1 = min(audioBuffer)
y2 = max(audioBuffer)
for c in chunks:
ax.fill_between(c[0:2], y1, y2,
color='gray', alpha=0.5)
plt.xlabel('Time [s]')
plt.ylabel('Amplitude')
plt.title(os.path.basename(filename))
return plt.gcf()
def get_audio_info(audio):
ts, chunks = get_chunk_times(audio, 30, 1)
p = get_audio_plot(audio, chunks)
return str(ts), p
otext = gr.outputs.Textbox(type="auto", label="Silence time")
oplot = gr.outputs.Image(type="plot", label=None)
iaudio = gr.inputs.Audio(source="upload", type="filepath", label=None)
iface = gr.Interface(
get_audio_info,
iaudio,
[otext, oplot],
description="Enter .WAV audio to view silence areas",
)
iface.launch()