Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ from scipy.io.wavfile import write
|
|
3 |
from scipy.signal import find_peaks
|
4 |
from scipy.fft import fft
|
5 |
from tqdm import tqdm
|
6 |
-
import time
|
7 |
import matplotlib.pyplot as plt
|
8 |
from scipy.io.wavfile import read
|
9 |
from scipy import signal
|
@@ -15,7 +14,7 @@ from scipy.signal import butter, lfilter
|
|
15 |
# ---------------Parameters--------------- #
|
16 |
|
17 |
input_file = 'input_text.wav'
|
18 |
-
output_file = '
|
19 |
|
20 |
low_frequency = 18000
|
21 |
high_frequency = 19000
|
@@ -130,96 +129,201 @@ def record(audio):
|
|
130 |
except Exception as e:
|
131 |
# If an error occurs, return an error message
|
132 |
return f"Error: {str(e)}"
|
133 |
-
|
134 |
|
135 |
# -----------------Frame----------------- #
|
136 |
|
137 |
def calculate_snr(data, start, end, target_frequency):
|
138 |
-
|
139 |
-
|
140 |
-
frequencies = np.fft.fftfreq(len(spectrum), 1 / sample_rate)
|
141 |
-
target_index = np.abs(frequencies - target_frequency).argmin()
|
142 |
-
amplitude = np.abs(spectrum[target_index])
|
143 |
-
|
144 |
-
noise_segment = data[100:1000 + len(segment)]
|
145 |
-
noise_spectrum = np.fft.fft(noise_segment)
|
146 |
-
noise_amplitude = np.abs(noise_spectrum[target_index])
|
147 |
-
|
148 |
-
snr = 10 * np.log10(amplitude / noise_amplitude)
|
149 |
-
return snr
|
150 |
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
|
|
154 |
|
155 |
-
|
156 |
-
|
157 |
|
158 |
-
|
159 |
-
|
160 |
|
161 |
-
|
162 |
-
|
163 |
|
164 |
-
|
|
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
plt.xlabel("Time [s]")
|
169 |
-
plt.ylabel("Frequency [Hz]")
|
170 |
-
plt.title("Spectrogram of the signal")
|
171 |
-
plt.show()
|
172 |
|
173 |
-
|
|
|
174 |
|
175 |
-
|
|
|
176 |
|
177 |
-
|
178 |
-
|
179 |
|
180 |
-
|
|
|
|
|
|
|
181 |
|
182 |
-
t_start = t[t_idx_start]
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
|
188 |
-
|
|
|
189 |
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
|
193 |
# -----------------Receiver----------------- #
|
194 |
|
195 |
def dominant_frequency(signal_value):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
yf = fft(signal_value)
|
|
|
|
|
197 |
xf = np.linspace(0.0, sample_rate / 2.0, len(signal_value) // 2)
|
|
|
|
|
198 |
peaks, _ = find_peaks(np.abs(yf[0:len(signal_value) // 2]))
|
|
|
|
|
199 |
return xf[peaks[np.argmax(np.abs(yf[0:len(signal_value) // 2][peaks]))]]
|
200 |
|
201 |
|
202 |
def binary_to_text(binary):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
try:
|
|
|
204 |
return ''.join(chr(int(binary[i:i + 8], 2)) for i in range(0, len(binary), 8))
|
205 |
except Exception as e:
|
206 |
-
return
|
|
|
207 |
|
208 |
|
209 |
def decode_rs(binary_string, ecc_bytes):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
byte_data = bytearray(int(binary_string[i:i + 8], 2) for i in range(0, len(binary_string), 8))
|
|
|
|
|
211 |
rs = reedsolo.RSCodec(ecc_bytes)
|
|
|
|
|
212 |
corrected_data_tuple = rs.decode(byte_data)
|
213 |
corrected_data = corrected_data_tuple[0]
|
214 |
|
|
|
215 |
corrected_data = corrected_data.rstrip(b'\x00')
|
216 |
|
|
|
217 |
corrected_binary_string = ''.join(format(byte, '08b') for byte in corrected_data)
|
218 |
|
219 |
return corrected_binary_string
|
220 |
|
221 |
|
222 |
def manchester_decoding(binary_string):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
decoded_string = ''
|
224 |
for i in tqdm(range(0, len(binary_string), 2), desc="Decoding"):
|
225 |
if i + 1 < len(binary_string):
|
@@ -234,16 +338,27 @@ def manchester_decoding(binary_string):
|
|
234 |
|
235 |
|
236 |
def signal_to_binary_between_times(filename):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
start_time, end_time = frame_analyse(filename)
|
238 |
|
|
|
239 |
sr, data = read(filename)
|
240 |
|
|
|
241 |
start_sample = int((start_time - 0.007) * sr)
|
242 |
end_sample = int((end_time - 0.007) * sr)
|
243 |
binary_string = ''
|
244 |
|
245 |
-
|
246 |
-
|
247 |
for i in tqdm(range(start_sample, end_sample, int(sr * bit_duration))):
|
248 |
signal_value = data[i:i + int(sr * bit_duration)]
|
249 |
frequency = dominant_frequency(signal_value)
|
@@ -252,10 +367,10 @@ def signal_to_binary_between_times(filename):
|
|
252 |
else:
|
253 |
binary_string += '1'
|
254 |
|
|
|
255 |
index_start = binary_string.find("1000001")
|
256 |
substrings = ["0111110", "011110"]
|
257 |
index_end = -1
|
258 |
-
|
259 |
for substring in substrings:
|
260 |
index = binary_string.find(substring)
|
261 |
if index != -1:
|
@@ -265,21 +380,33 @@ def signal_to_binary_between_times(filename):
|
|
265 |
print("Binary String:", binary_string)
|
266 |
binary_string_decoded = manchester_decoding(binary_string[index_start + 7:index_end])
|
267 |
|
|
|
268 |
decoded_binary_string = decode_rs(binary_string_decoded, 20)
|
269 |
|
270 |
return decoded_binary_string
|
271 |
|
272 |
|
273 |
def receive():
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
try:
|
|
|
275 |
audio_receive = signal_to_binary_between_times('output_filtered_receiver.wav')
|
|
|
|
|
276 |
return binary_to_text(audio_receive)
|
277 |
except Exception as e:
|
|
|
278 |
return f"Error: {e}"
|
279 |
|
280 |
|
281 |
# -----------------Interface----------------- #
|
282 |
|
|
|
283 |
with gr.Blocks() as demo:
|
284 |
input_audio = gr.Audio(sources=["upload"])
|
285 |
output_text = gr.Textbox(label="Record Sound")
|
|
|
3 |
from scipy.signal import find_peaks
|
4 |
from scipy.fft import fft
|
5 |
from tqdm import tqdm
|
|
|
6 |
import matplotlib.pyplot as plt
|
7 |
from scipy.io.wavfile import read
|
8 |
from scipy import signal
|
|
|
14 |
# ---------------Parameters--------------- #
|
15 |
|
16 |
input_file = 'input_text.wav'
|
17 |
+
output_file = 'output_filtered_receiver.wav'
|
18 |
|
19 |
low_frequency = 18000
|
20 |
high_frequency = 19000
|
|
|
129 |
except Exception as e:
|
130 |
# If an error occurs, return an error message
|
131 |
return f"Error: {str(e)}"
|
132 |
+
|
133 |
|
134 |
# -----------------Frame----------------- #
|
135 |
|
136 |
def calculate_snr(data, start, end, target_frequency):
|
137 |
+
"""
|
138 |
+
This function calculates the Signal-to-Noise Ratio (SNR) for a given frequency within a segment of data.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
+
Parameters:
|
141 |
+
data (array): The audio data.
|
142 |
+
start (int): The start index of the segment.
|
143 |
+
end (int): The end index of the segment.
|
144 |
+
target_frequency (float): The frequency for which the SNR is to be calculated.
|
145 |
|
146 |
+
Returns:
|
147 |
+
float: The calculated SNR.
|
148 |
+
"""
|
149 |
+
try:
|
150 |
+
# Extract the segment from the data
|
151 |
+
segment = data[start:end]
|
152 |
|
153 |
+
# Perform a Fast Fourier Transform on the segment
|
154 |
+
spectrum = np.fft.fft(segment)
|
155 |
|
156 |
+
# Generate the frequencies corresponding to the FFT coefficients
|
157 |
+
frequencies = np.fft.fftfreq(len(spectrum), 1 / sample_rate)
|
158 |
|
159 |
+
# Find the index of the target frequency
|
160 |
+
target_index = np.abs(frequencies - target_frequency).argmin()
|
161 |
|
162 |
+
# Calculate the amplitude of the target frequency
|
163 |
+
amplitude = np.abs(spectrum[target_index])
|
164 |
|
165 |
+
# Define a noise segment
|
166 |
+
noise_segment = data[100:1000 + len(segment)]
|
|
|
|
|
|
|
|
|
167 |
|
168 |
+
# Perform a Fast Fourier Transform on the noise segment
|
169 |
+
noise_spectrum = np.fft.fft(noise_segment)
|
170 |
|
171 |
+
# Calculate the amplitude of the noise at the target frequency
|
172 |
+
noise_amplitude = np.abs(noise_spectrum[target_index])
|
173 |
|
174 |
+
# Calculate the SNR
|
175 |
+
snr = 10 * np.log10(amplitude / noise_amplitude)
|
176 |
|
177 |
+
return snr
|
178 |
+
except Exception as e:
|
179 |
+
# If an error occurs, return an error message
|
180 |
+
return f"Error: {e}"
|
181 |
|
|
|
182 |
|
183 |
+
def frame_analyse(filename):
|
184 |
+
"""
|
185 |
+
This function analyses an audio file and returns the start and end times of the signal of interest.
|
186 |
|
187 |
+
Parameters:
|
188 |
+
filename (str): The path to the audio file.
|
189 |
|
190 |
+
Returns:
|
191 |
+
tuple: The start and end times of the signal of interest.
|
192 |
+
"""
|
193 |
+
try:
|
194 |
+
# Read the audio file
|
195 |
+
sr, y = read(filename)
|
196 |
+
|
197 |
+
# Define the start and end indices of the first and second parts of the audio data
|
198 |
+
first_part_start = 0
|
199 |
+
first_part_end = len(y) // 2
|
200 |
+
second_part_start = len(y) // 2
|
201 |
+
second_part_end = len(y)
|
202 |
+
|
203 |
+
# Define the segment length and overlap size for the spectrogram
|
204 |
+
segment_length = 256
|
205 |
+
overlap_size = 128
|
206 |
+
|
207 |
+
# Calculate the spectrogram of the audio data
|
208 |
+
f, t, sxx = signal.spectrogram(y, sr, nperseg=segment_length, noverlap=overlap_size)
|
209 |
+
|
210 |
+
# Plot the spectrogram
|
211 |
+
plt.figure()
|
212 |
+
plt.pcolormesh(t, f, sxx, shading="gouraud")
|
213 |
+
plt.xlabel("Time [s]")
|
214 |
+
plt.ylabel("Frequency [Hz]")
|
215 |
+
plt.title("Spectrogram of the signal")
|
216 |
+
plt.show()
|
217 |
+
|
218 |
+
# Define the target frequency
|
219 |
+
f0 = 18000
|
220 |
+
|
221 |
+
# Find the index of the target frequency
|
222 |
+
f_idx = np.argmin(np.abs(f - f0))
|
223 |
+
|
224 |
+
# Calculate the SNR thresholds for the start and end of the signal
|
225 |
+
thresholds_start = calculate_snr(y, first_part_start, first_part_end, low_frequency)
|
226 |
+
thresholds_end = calculate_snr(y, second_part_start, second_part_end, high_frequency)
|
227 |
+
|
228 |
+
# Find the start and end indices of the signal of interest
|
229 |
+
t_idx_start = np.argmax(sxx[f_idx] > thresholds_start)
|
230 |
+
t_idx_end = t_idx_start
|
231 |
+
while t_idx_end < len(t) and np.max(sxx[f_idx, t_idx_end:]) > thresholds_end:
|
232 |
+
t_idx_end += 1
|
233 |
+
|
234 |
+
# Convert the start and end indices to times
|
235 |
+
t_start = t[t_idx_start]
|
236 |
+
t_end = t[t_idx_end]
|
237 |
+
|
238 |
+
return t_start, t_end
|
239 |
+
except Exception as e:
|
240 |
+
# If an error occurs, return an error message
|
241 |
+
return f"Error: {e}"
|
242 |
|
243 |
|
244 |
# -----------------Receiver----------------- #
|
245 |
|
246 |
def dominant_frequency(signal_value):
|
247 |
+
"""
|
248 |
+
This function calculates the dominant frequency in a given signal.
|
249 |
+
|
250 |
+
Parameters:
|
251 |
+
signal_value (array): The signal data.
|
252 |
+
|
253 |
+
Returns:
|
254 |
+
float: The dominant frequency.
|
255 |
+
"""
|
256 |
+
# Perform a Fast Fourier Transform on the signal
|
257 |
yf = fft(signal_value)
|
258 |
+
|
259 |
+
# Generate the frequencies corresponding to the FFT coefficients
|
260 |
xf = np.linspace(0.0, sample_rate / 2.0, len(signal_value) // 2)
|
261 |
+
|
262 |
+
# Find the peaks in the absolute values of the FFT coefficients
|
263 |
peaks, _ = find_peaks(np.abs(yf[0:len(signal_value) // 2]))
|
264 |
+
|
265 |
+
# Return the frequency corresponding to the peak with the highest amplitude
|
266 |
return xf[peaks[np.argmax(np.abs(yf[0:len(signal_value) // 2][peaks]))]]
|
267 |
|
268 |
|
269 |
def binary_to_text(binary):
|
270 |
+
"""
|
271 |
+
This function converts a binary string to text.
|
272 |
+
|
273 |
+
Parameters:
|
274 |
+
binary (str): The binary string.
|
275 |
+
|
276 |
+
Returns:
|
277 |
+
str: The converted text.
|
278 |
+
"""
|
279 |
try:
|
280 |
+
# Convert each 8-bit binary number to a character and join them together
|
281 |
return ''.join(chr(int(binary[i:i + 8], 2)) for i in range(0, len(binary), 8))
|
282 |
except Exception as e:
|
283 |
+
# If an error occurs, return an error message
|
284 |
+
return f"Error: {e}"
|
285 |
|
286 |
|
287 |
def decode_rs(binary_string, ecc_bytes):
|
288 |
+
"""
|
289 |
+
This function decodes a Reed-Solomon encoded binary string.
|
290 |
+
|
291 |
+
Parameters:
|
292 |
+
binary_string (str): The binary string.
|
293 |
+
ecc_bytes (int): The number of error correction bytes used in the encoding.
|
294 |
+
|
295 |
+
Returns:
|
296 |
+
str: The decoded binary string.
|
297 |
+
"""
|
298 |
+
# Convert the binary string to a bytearray
|
299 |
byte_data = bytearray(int(binary_string[i:i + 8], 2) for i in range(0, len(binary_string), 8))
|
300 |
+
|
301 |
+
# Initialize a Reed-Solomon codec
|
302 |
rs = reedsolo.RSCodec(ecc_bytes)
|
303 |
+
|
304 |
+
# Decode the bytearray
|
305 |
corrected_data_tuple = rs.decode(byte_data)
|
306 |
corrected_data = corrected_data_tuple[0]
|
307 |
|
308 |
+
# Remove trailing null bytes
|
309 |
corrected_data = corrected_data.rstrip(b'\x00')
|
310 |
|
311 |
+
# Convert the bytearray back to a binary string
|
312 |
corrected_binary_string = ''.join(format(byte, '08b') for byte in corrected_data)
|
313 |
|
314 |
return corrected_binary_string
|
315 |
|
316 |
|
317 |
def manchester_decoding(binary_string):
|
318 |
+
"""
|
319 |
+
This function decodes a Manchester encoded binary string.
|
320 |
+
|
321 |
+
Parameters:
|
322 |
+
binary_string (str): The binary string.
|
323 |
+
|
324 |
+
Returns:
|
325 |
+
str: The decoded binary string.
|
326 |
+
"""
|
327 |
decoded_string = ''
|
328 |
for i in tqdm(range(0, len(binary_string), 2), desc="Decoding"):
|
329 |
if i + 1 < len(binary_string):
|
|
|
338 |
|
339 |
|
340 |
def signal_to_binary_between_times(filename):
|
341 |
+
"""
|
342 |
+
This function converts a signal to a binary string between specified times.
|
343 |
+
|
344 |
+
Parameters:
|
345 |
+
filename (str): The path to the audio file.
|
346 |
+
|
347 |
+
Returns:
|
348 |
+
str: The binary string.
|
349 |
+
"""
|
350 |
+
# Get the start and end times of the signal of interest
|
351 |
start_time, end_time = frame_analyse(filename)
|
352 |
|
353 |
+
# Read the audio file
|
354 |
sr, data = read(filename)
|
355 |
|
356 |
+
# Calculate the start and end samples of the signal of interest
|
357 |
start_sample = int((start_time - 0.007) * sr)
|
358 |
end_sample = int((end_time - 0.007) * sr)
|
359 |
binary_string = ''
|
360 |
|
361 |
+
# Convert each sample to a binary digit
|
|
|
362 |
for i in tqdm(range(start_sample, end_sample, int(sr * bit_duration))):
|
363 |
signal_value = data[i:i + int(sr * bit_duration)]
|
364 |
frequency = dominant_frequency(signal_value)
|
|
|
367 |
else:
|
368 |
binary_string += '1'
|
369 |
|
370 |
+
# Find the start and end indices of the binary string
|
371 |
index_start = binary_string.find("1000001")
|
372 |
substrings = ["0111110", "011110"]
|
373 |
index_end = -1
|
|
|
374 |
for substring in substrings:
|
375 |
index = binary_string.find(substring)
|
376 |
if index != -1:
|
|
|
380 |
print("Binary String:", binary_string)
|
381 |
binary_string_decoded = manchester_decoding(binary_string[index_start + 7:index_end])
|
382 |
|
383 |
+
# Decode the binary string
|
384 |
decoded_binary_string = decode_rs(binary_string_decoded, 20)
|
385 |
|
386 |
return decoded_binary_string
|
387 |
|
388 |
|
389 |
def receive():
|
390 |
+
"""
|
391 |
+
This function receives an audio signal, converts it to a binary string, and then converts the binary string to text.
|
392 |
+
|
393 |
+
Returns:
|
394 |
+
str: The received text.
|
395 |
+
"""
|
396 |
try:
|
397 |
+
# Convert the audio signal to a binary string
|
398 |
audio_receive = signal_to_binary_between_times('output_filtered_receiver.wav')
|
399 |
+
|
400 |
+
# Convert the binary string to text
|
401 |
return binary_to_text(audio_receive)
|
402 |
except Exception as e:
|
403 |
+
# If an error occurs, return an error message
|
404 |
return f"Error: {e}"
|
405 |
|
406 |
|
407 |
# -----------------Interface----------------- #
|
408 |
|
409 |
+
# Start a Gradio Blocks interface
|
410 |
with gr.Blocks() as demo:
|
411 |
input_audio = gr.Audio(sources=["upload"])
|
412 |
output_text = gr.Textbox(label="Record Sound")
|