Spaces:

thugCodeNinja
/

SpoofDetection

Runtime error

App Files Files Community

thugCodeNinja commited on Apr 5, 2024

Commit

817f64d

verified ·

1 Parent(s): dd50305

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -8

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import numpy as np
 import librosa
 from tensorflow.keras.models import load_model
 # Constants
 MAX_TIME_STEPS = 109
 SAMPLE_RATE = 16000
@@ -14,6 +14,18 @@ MODEL_PATH = "audio_classifier.h5"  # Replace with the actual path to your saved
 model = load_model(MODEL_PATH, compile=False)
 model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
 def classify_audio(audio):
     # Convert the audio data to NumPy array
     rate, ar = audio
@@ -37,13 +49,16 @@ def classify_audio(audio):
     # Convert probabilities to predicted classes
     y_pred_classes = np.argmax(y_pred, axis=1)
-    if(y_pred_classes[0]==1):
-        return f"Prediction: {'Not spoof'}"
     else:
-        return f"Prediction: {'Spoof'}"
-title="Audios Spoof detection using CNN"
-description="The model was trained on the ASVspoof 2015 dataset with an aim to detect spoof audios through deep learning.To use it please upload an audio file of suitable length."
-iface = gr.Interface(classify_audio, inputs=["audio"], outputs=["text"],title=title,description=description)
-iface.launch()

 import numpy as np
 import librosa
 from tensorflow.keras.models import load_model
+import matplotlib.pyplot as plt
 # Constants
 MAX_TIME_STEPS = 109
 SAMPLE_RATE = 16000
 model = load_model(MODEL_PATH, compile=False)
 model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
+def visualize(mel_spectrogram):
+    median_decibels = np.median(mel_spectrogram)
+    median_human_voice_range = -65
+    diff_decibels = abs(median_decibels - median_human_voice_range)
+    plt.figure(figsize=(12, 6))
+    plt.subplot(1, 1, 1)
+    librosa.display.specshow(mel_spectrogram, sr=SAMPLE_RATE, x_axis='time', y_axis='mel')
+    plt.colorbar(format='%+2.0f dB')
+    plt.title(f'Difference from Median Human Voice Range: {diff_decibels:.2f} dB')
+    plt.savefig("mel_spectrogram.png")  # Save the image
+    plt.close()
 def classify_audio(audio):
     # Convert the audio data to NumPy array
     rate, ar = audio
     # Convert probabilities to predicted classes
     y_pred_classes = np.argmax(y_pred, axis=1)
+    if y_pred_classes[0] == 1:
+        prediction = "Not Spoof : High chances of original voice"
     else:
+        prediction = "Spoof : Possible voice cloning"
+    visualize(mel_spectrogram)
+    return prediction,"mel_spectrogram.png"
+title=" Group-2 Audio Spoof detection using CNN"
+description="The model was trained on the ASVspoof 2019 dataset with an aim to detect spoof audios through deep learning.To use it please upload an audio file of suitable length. The Mel spectrogram used for inferencing is also available for the user to understand the classification and compare it with the median Human decibal range."
+iface = gr.Interface(classify_audio, inputs=["audio"], outputs=["text","image"],title=title,description=description)
+iface.launch()