Commit
·
82f19d8
1
Parent(s):
b36d167
fix: typo
Browse files
app.py
CHANGED
@@ -1,34 +1,35 @@
|
|
1 |
-
import gradio as gr
|
2 |
from helper import process_audio
|
3 |
-
import
|
4 |
-
import numpy as np
|
5 |
|
6 |
# Sample audio file paths
|
7 |
SAMPLE_SPEECH = "anushka.wav"
|
8 |
SAMPLE_NOISE = "traffic.wav"
|
9 |
|
|
|
10 |
def process_audio_files(speech_file, noise_file, alpha, beta):
|
11 |
"""
|
12 |
Process the audio files and return the mixed output
|
13 |
-
|
14 |
Args:
|
15 |
speech_file (tuple): Speech audio (sample_rate, data)
|
16 |
noise_file (tuple): Noise audio (sample_rate, data)
|
17 |
alpha (float): First slider value (-30 to +30)
|
18 |
beta (float): Second slider value (-30 to +30)
|
19 |
-
|
20 |
Returns:
|
21 |
tuple: (sample_rate, processed_audio_data)
|
22 |
"""
|
23 |
speech_sr, speech_data = speech_file
|
24 |
noise_sr, noise_data = noise_file
|
25 |
-
|
26 |
# Process the audio using the helper function
|
27 |
-
output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr,
|
28 |
-
|
|
|
29 |
# Convert AudioSegment to numpy array
|
30 |
samples = np.array(output_audio.get_array_of_samples())
|
31 |
-
|
32 |
return (output_audio.frame_rate, samples)
|
33 |
|
34 |
|
|
|
1 |
+
import gradio as gr # type: ignore
|
2 |
from helper import process_audio
|
3 |
+
import numpy as np # type: ignore
|
|
|
4 |
|
5 |
# Sample audio file paths
|
6 |
SAMPLE_SPEECH = "anushka.wav"
|
7 |
SAMPLE_NOISE = "traffic.wav"
|
8 |
|
9 |
+
|
10 |
def process_audio_files(speech_file, noise_file, alpha, beta):
|
11 |
"""
|
12 |
Process the audio files and return the mixed output
|
13 |
+
|
14 |
Args:
|
15 |
speech_file (tuple): Speech audio (sample_rate, data)
|
16 |
noise_file (tuple): Noise audio (sample_rate, data)
|
17 |
alpha (float): First slider value (-30 to +30)
|
18 |
beta (float): Second slider value (-30 to +30)
|
19 |
+
|
20 |
Returns:
|
21 |
tuple: (sample_rate, processed_audio_data)
|
22 |
"""
|
23 |
speech_sr, speech_data = speech_file
|
24 |
noise_sr, noise_data = noise_file
|
25 |
+
|
26 |
# Process the audio using the helper function
|
27 |
+
output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr,
|
28 |
+
alpha, beta)
|
29 |
+
|
30 |
# Convert AudioSegment to numpy array
|
31 |
samples = np.array(output_audio.get_array_of_samples())
|
32 |
+
|
33 |
return (output_audio.frame_rate, samples)
|
34 |
|
35 |
|
helper.py
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
import random
|
2 |
-
from pydub import AudioSegment
|
3 |
# from pydub.effects import normalize
|
4 |
-
import numpy as np
|
5 |
|
6 |
|
7 |
def get_audio_volume_db(audio):
|
8 |
-
"""Estimate the volume in dBFS (decibels relative to full scale) using PyDub."""
|
9 |
-
return audio.dBFS if audio.dBFS != float('-inf') else -50.0 # Default to -50 dB for silence
|
10 |
|
11 |
|
12 |
def adjust_volume(audio, volume_change_db):
|
@@ -46,16 +46,16 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
|
|
46 |
"""
|
47 |
# Convert numpy arrays to AudioSegment
|
48 |
speech_audio = AudioSegment(
|
49 |
-
speech_data.tobytes(),
|
50 |
frame_rate=speech_sr,
|
51 |
-
sample_width=speech_data.dtype.itemsize,
|
52 |
channels=1
|
53 |
)
|
54 |
-
|
55 |
noise_audio = AudioSegment(
|
56 |
-
noise_data.tobytes(),
|
57 |
frame_rate=noise_sr,
|
58 |
-
sample_width=noise_data.dtype.itemsize,
|
59 |
channels=1
|
60 |
)
|
61 |
|
@@ -66,8 +66,8 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
|
|
66 |
if len(noise_audio) / 1000.0 <= speech_duration:
|
67 |
trimmed_noise = noise_audio
|
68 |
else:
|
69 |
-
start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000
|
70 |
-
trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]
|
71 |
|
72 |
trimmed_noise = trimmed_noise.set_frame_rate(8000)
|
73 |
|
|
|
1 |
import random
|
2 |
+
from pydub import AudioSegment # type: ignore
|
3 |
# from pydub.effects import normalize
|
4 |
+
# import numpy as np # type: ignore
|
5 |
|
6 |
|
7 |
def get_audio_volume_db(audio):
|
8 |
+
"""Estimate the volume in dBFS (decibels relative to full scale) using PyDub.""" # noqa
|
9 |
+
return audio.dBFS if audio.dBFS != float('-inf') else -50.0 # Default to -50 dB for silence # noqa
|
10 |
|
11 |
|
12 |
def adjust_volume(audio, volume_change_db):
|
|
|
46 |
"""
|
47 |
# Convert numpy arrays to AudioSegment
|
48 |
speech_audio = AudioSegment(
|
49 |
+
speech_data.tobytes(),
|
50 |
frame_rate=speech_sr,
|
51 |
+
sample_width=speech_data.dtype.itemsize,
|
52 |
channels=1
|
53 |
)
|
54 |
+
|
55 |
noise_audio = AudioSegment(
|
56 |
+
noise_data.tobytes(),
|
57 |
frame_rate=noise_sr,
|
58 |
+
sample_width=noise_data.dtype.itemsize,
|
59 |
channels=1
|
60 |
)
|
61 |
|
|
|
66 |
if len(noise_audio) / 1000.0 <= speech_duration:
|
67 |
trimmed_noise = noise_audio
|
68 |
else:
|
69 |
+
start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000 # noqa
|
70 |
+
trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)] # noqa
|
71 |
|
72 |
trimmed_noise = trimmed_noise.set_frame_rate(8000)
|
73 |
|