VigneshDark commited on
Commit
82f19d8
·
1 Parent(s): b36d167
Files changed (2) hide show
  1. app.py +10 -9
  2. helper.py +11 -11
app.py CHANGED
@@ -1,34 +1,35 @@
1
- import gradio as gr
2
  from helper import process_audio
3
- import os
4
- import numpy as np
5
 
6
  # Sample audio file paths
7
  SAMPLE_SPEECH = "anushka.wav"
8
  SAMPLE_NOISE = "traffic.wav"
9
 
 
10
  def process_audio_files(speech_file, noise_file, alpha, beta):
11
  """
12
  Process the audio files and return the mixed output
13
-
14
  Args:
15
  speech_file (tuple): Speech audio (sample_rate, data)
16
  noise_file (tuple): Noise audio (sample_rate, data)
17
  alpha (float): First slider value (-30 to +30)
18
  beta (float): Second slider value (-30 to +30)
19
-
20
  Returns:
21
  tuple: (sample_rate, processed_audio_data)
22
  """
23
  speech_sr, speech_data = speech_file
24
  noise_sr, noise_data = noise_file
25
-
26
  # Process the audio using the helper function
27
- output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta)
28
-
 
29
  # Convert AudioSegment to numpy array
30
  samples = np.array(output_audio.get_array_of_samples())
31
-
32
  return (output_audio.frame_rate, samples)
33
 
34
 
 
1
+ import gradio as gr # type: ignore
2
  from helper import process_audio
3
+ import numpy as np # type: ignore
 
4
 
5
  # Sample audio file paths
6
  SAMPLE_SPEECH = "anushka.wav"
7
  SAMPLE_NOISE = "traffic.wav"
8
 
9
+
10
  def process_audio_files(speech_file, noise_file, alpha, beta):
11
  """
12
  Process the audio files and return the mixed output
13
+
14
  Args:
15
  speech_file (tuple): Speech audio (sample_rate, data)
16
  noise_file (tuple): Noise audio (sample_rate, data)
17
  alpha (float): First slider value (-30 to +30)
18
  beta (float): Second slider value (-30 to +30)
19
+
20
  Returns:
21
  tuple: (sample_rate, processed_audio_data)
22
  """
23
  speech_sr, speech_data = speech_file
24
  noise_sr, noise_data = noise_file
25
+
26
  # Process the audio using the helper function
27
+ output_audio = process_audio(speech_data, noise_data, speech_sr, noise_sr,
28
+ alpha, beta)
29
+
30
  # Convert AudioSegment to numpy array
31
  samples = np.array(output_audio.get_array_of_samples())
32
+
33
  return (output_audio.frame_rate, samples)
34
 
35
 
helper.py CHANGED
@@ -1,12 +1,12 @@
1
  import random
2
- from pydub import AudioSegment
3
  # from pydub.effects import normalize
4
- import numpy as np
5
 
6
 
7
  def get_audio_volume_db(audio):
8
- """Estimate the volume in dBFS (decibels relative to full scale) using PyDub."""
9
- return audio.dBFS if audio.dBFS != float('-inf') else -50.0 # Default to -50 dB for silence
10
 
11
 
12
  def adjust_volume(audio, volume_change_db):
@@ -46,16 +46,16 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
46
  """
47
  # Convert numpy arrays to AudioSegment
48
  speech_audio = AudioSegment(
49
- speech_data.tobytes(),
50
  frame_rate=speech_sr,
51
- sample_width=speech_data.dtype.itemsize,
52
  channels=1
53
  )
54
-
55
  noise_audio = AudioSegment(
56
- noise_data.tobytes(),
57
  frame_rate=noise_sr,
58
- sample_width=noise_data.dtype.itemsize,
59
  channels=1
60
  )
61
 
@@ -66,8 +66,8 @@ def process_audio(speech_data, noise_data, speech_sr, noise_sr, alpha, beta):
66
  if len(noise_audio) / 1000.0 <= speech_duration:
67
  trimmed_noise = noise_audio
68
  else:
69
- start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000
70
- trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)]
71
 
72
  trimmed_noise = trimmed_noise.set_frame_rate(8000)
73
 
 
1
  import random
2
+ from pydub import AudioSegment # type: ignore
3
  # from pydub.effects import normalize
4
+ # import numpy as np # type: ignore
5
 
6
 
7
  def get_audio_volume_db(audio):
8
+ """Estimate the volume in dBFS (decibels relative to full scale) using PyDub.""" # noqa
9
+ return audio.dBFS if audio.dBFS != float('-inf') else -50.0 # Default to -50 dB for silence # noqa
10
 
11
 
12
  def adjust_volume(audio, volume_change_db):
 
46
  """
47
  # Convert numpy arrays to AudioSegment
48
  speech_audio = AudioSegment(
49
+ speech_data.tobytes(),
50
  frame_rate=speech_sr,
51
+ sample_width=speech_data.dtype.itemsize,
52
  channels=1
53
  )
54
+
55
  noise_audio = AudioSegment(
56
+ noise_data.tobytes(),
57
  frame_rate=noise_sr,
58
+ sample_width=noise_data.dtype.itemsize,
59
  channels=1
60
  )
61
 
 
66
  if len(noise_audio) / 1000.0 <= speech_duration:
67
  trimmed_noise = noise_audio
68
  else:
69
+ start_time = random.uniform(0, len(noise_audio) / 1000.0 - speech_duration) * 1000 # noqa
70
+ trimmed_noise = noise_audio[start_time:start_time + (speech_duration * 1000)] # noqa
71
 
72
  trimmed_noise = trimmed_noise.set_frame_rate(8000)
73