File size: 2,723 Bytes
31a07fa
 
 
 
 
 
 
 
 
 
b1e98dc
31a07fa
 
 
 
 
 
 
 
 
 
 
b1e98dc
 
 
2337adf
b1e98dc
 
 
31a07fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739a7b0
31a07fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739a7b0
31a07fa
739a7b0
31a07fa
 
e0fdf16
 
31a07fa
 
 
 
 
 
e0fdf16
31a07fa
 
 
 
 
 
 
 
739a7b0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
import matplotlib.pyplot as plt
import subprocess
import re
import logging
import os
import numpy as np
import matplotlib
import scipy.io
import scipy.io.wavfile
from pathlib import Path

matplotlib.use('Agg')

logging.basicConfig(level=logging.INFO)
logging.getLogger()

def get_chunk_times(in_filename, silence_threshold, silence_duration=1):

        silence_duration_re = re.compile('silence_duration: (\d+.\d+)')
        silence_end_re = re.compile('silence_end: (\d+.\d+)\s')

        logging.info(f"File {in_filename} exists? = {os.path.exists(in_filename)}")        

        fpath = Path(in_filename).absolute()
        
        logging.info(f"Absolue path: {fpath}")

        command = f"ffmpeg -i {fpath} -af silencedetect=n=-{silence_threshold}dB:d={silence_duration} -f null - "
        out = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

        stdout, stderr = out.communicate()

        lines = stdout.splitlines()

        ts = 0
        chunks = []
        for line in lines:
            match = silence_duration_re.search(str(line))
            if(match):
                chunk_time = float(match.group(1))
                ts = ts + chunk_time
                end = silence_end_re.search(str(line))
                if(end):
                    t_end = float(end.group(1))
                    t_start = t_end - chunk_time
                    chunks.append([t_start, t_end, chunks])

        logging.info(f"TS audio {os.path.basename(in_filename)} = {ts}")
        return ts, chunks
        

def get_audio_plot(filename, chunks):    
    fig, ax = plt.subplots()

    fig.set_size_inches(18.5, 10.5)

    sampleRate, audioBuffer = scipy.io.wavfile.read(filename)

    duration = len(audioBuffer)/sampleRate

    time = np.arange(0,duration,1/sampleRate) #time vector

    ax.plot(time,audioBuffer)
    y1 = min(audioBuffer)
    y2 = max(audioBuffer)
    
    for c in chunks:
        ax.fill_between(c[0:2], y1, y2,
                color='gray', alpha=0.5)    

    plt.xlabel('Time [s]')
    plt.ylabel('Amplitude')
    plt.title(os.path.basename(filename))

    return plt.gcf()
    

def get_audio_info(audio):
    ts, chunks = get_chunk_times(audio, 30, 1)
    p = get_audio_plot(audio, chunks)
    return str(ts), p

otext = gr.outputs.Textbox(type="auto", label="Silence time")

oplot = gr.outputs.Image(type="plot", label=None)

iaudio = gr.inputs.Audio(source="upload", type="filepath", label=None)

iface = gr.Interface(
    get_audio_info,
    iaudio,
    [otext, oplot],
    description="Enter .WAV audio to view silence areas",
)

iface.launch()