thugCodeNinja commited on
Commit
817f64d
·
verified ·
1 Parent(s): dd50305

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import numpy as np
3
  import librosa
4
  from tensorflow.keras.models import load_model
5
-
6
  # Constants
7
  MAX_TIME_STEPS = 109
8
  SAMPLE_RATE = 16000
@@ -14,6 +14,18 @@ MODEL_PATH = "audio_classifier.h5" # Replace with the actual path to your saved
14
  model = load_model(MODEL_PATH, compile=False)
15
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def classify_audio(audio):
18
  # Convert the audio data to NumPy array
19
  rate, ar = audio
@@ -37,13 +49,16 @@ def classify_audio(audio):
37
  # Convert probabilities to predicted classes
38
  y_pred_classes = np.argmax(y_pred, axis=1)
39
 
40
- if(y_pred_classes[0]==1):
41
- return f"Prediction: {'Not spoof'}"
42
  else:
43
- return f"Prediction: {'Spoof'}"
 
 
 
44
 
45
- title="Audios Spoof detection using CNN"
46
- description="The model was trained on the ASVspoof 2015 dataset with an aim to detect spoof audios through deep learning.To use it please upload an audio file of suitable length."
47
 
48
- iface = gr.Interface(classify_audio, inputs=["audio"], outputs=["text"],title=title,description=description)
49
- iface.launch()
 
2
  import numpy as np
3
  import librosa
4
  from tensorflow.keras.models import load_model
5
+ import matplotlib.pyplot as plt
6
  # Constants
7
  MAX_TIME_STEPS = 109
8
  SAMPLE_RATE = 16000
 
14
  model = load_model(MODEL_PATH, compile=False)
15
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
16
 
17
+ def visualize(mel_spectrogram):
18
+ median_decibels = np.median(mel_spectrogram)
19
+ median_human_voice_range = -65
20
+ diff_decibels = abs(median_decibels - median_human_voice_range)
21
+ plt.figure(figsize=(12, 6))
22
+ plt.subplot(1, 1, 1)
23
+ librosa.display.specshow(mel_spectrogram, sr=SAMPLE_RATE, x_axis='time', y_axis='mel')
24
+ plt.colorbar(format='%+2.0f dB')
25
+ plt.title(f'Difference from Median Human Voice Range: {diff_decibels:.2f} dB')
26
+ plt.savefig("mel_spectrogram.png") # Save the image
27
+ plt.close()
28
+
29
  def classify_audio(audio):
30
  # Convert the audio data to NumPy array
31
  rate, ar = audio
 
49
  # Convert probabilities to predicted classes
50
  y_pred_classes = np.argmax(y_pred, axis=1)
51
 
52
+ if y_pred_classes[0] == 1:
53
+ prediction = "Not Spoof : High chances of original voice"
54
  else:
55
+ prediction = "Spoof : Possible voice cloning"
56
+
57
+ visualize(mel_spectrogram)
58
+ return prediction,"mel_spectrogram.png"
59
 
60
+ title=" Group-2 Audio Spoof detection using CNN"
61
+ description="The model was trained on the ASVspoof 2019 dataset with an aim to detect spoof audios through deep learning.To use it please upload an audio file of suitable length. The Mel spectrogram used for inferencing is also available for the user to understand the classification and compare it with the median Human decibal range."
62
 
63
+ iface = gr.Interface(classify_audio, inputs=["audio"], outputs=["text","image"],title=title,description=description)
64
+ iface.launch()