import os import sys import pandas as pd import altair as alt import io import streamlit as st from fake_audio_detection.model import predict_audio_blocks parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(parent_dir) st.title("🔎 DeepVoice Detection") APP_DIR = os.path.dirname(os.path.abspath(__file__)) # if you want to code your training part DATASET_DIR = os.path.join(APP_DIR, "dataset/") MODEL_PATH = os.path.join(APP_DIR, "model/noma-1") REAL_DIR = os.path.join(APP_DIR, "audios/real") FAKE_DIR = os.path.join(APP_DIR, "audios/fake") # Then continue as before real_audio = { f"Real - {f}": os.path.join(REAL_DIR, f) for f in os.listdir(REAL_DIR) if f.endswith((".wav", ".mp3")) } fake_audio = { f"Fake - {f}": os.path.join(FAKE_DIR, f) for f in os.listdir(FAKE_DIR) if f.endswith((".wav", ".mp3")) } all_audio = {**real_audio, **fake_audio} selected_label = st.radio("Select an audio file to play:", list(all_audio.keys())) selected_path = all_audio[selected_label] st.write("#### Try with your audios") uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "ogg"]) selected_label = "Default Audio" if uploaded_file is not None: st.markdown(f"**Now Playing:** `{uploaded_file.name}`") audio_bytes = uploaded_file.read() file_extension = uploaded_file.name.split(".")[-1].lower() st.audio(audio_bytes, format=f"audio/{file_extension}") else: st.markdown(f"**Now Playing:** `{selected_label}`") with open(selected_path, "rb") as audio_file: audio_bytes = audio_file.read() st.audio(audio_bytes, format="audio/wav") if st.button("Run Prediction") and os.path.exists(MODEL_PATH): audio_bytes = None if uploaded_file: bytes_data = uploaded_file.getvalue() audio_bytes = io.BytesIO(bytes_data) with st.spinner("Analyzing audio..."): times, probas = predict_audio_blocks(MODEL_PATH, selected_path, audio_bytes) preds = probas.argmax(axis=1) confidences = probas.max(axis=1) preds_as_string = ["Fake" if i == 0 else "Real" for i in preds] df = pd.DataFrame( {"Seconds": times, "Prediction": preds_as_string, "Confidence": confidences} ) def get_color(row): if row["Confidence"] < 0.3: return "Uncertain" return row["Prediction"] df["Confidence Level"] = df.apply(get_color, axis=1) # Plot st.markdown("### Prediction by 1s Blocks") st.markdown( "Hover above each bar to see the confidence level of each prediction." ) chart = ( alt.Chart(df) .mark_bar() .encode( x=alt.X("Seconds:O", title="Seconds"), y=alt.value(30), color=alt.Color( "Confidence Level:N", scale=alt.Scale( domain=["Fake", "Real", "Uncertain"], range=["steelblue", "green", "gray"], ), ), tooltip=["Seconds", "Prediction", "Confidence"], ) .properties(width=700, height=150) ) text = ( alt.Chart(df) .mark_text( align="right", baseline="top", dy=10, color="white", xOffset=10, yOffset=-20, fontSize=14, ) .encode(x=alt.X("Seconds:O"), y=alt.value(15), text="Prediction:N") ) st.altair_chart(chart + text, use_container_width=True) st.markdown("### Overall prediction") if all(element == "Real" for element in preds_as_string): st.markdown("The audio is **Real**") elif all(element == "Fake" for element in preds_as_string): st.markdown("The audio is **Fake**") else: st.markdown("Some parts of the audio have been detected as **Fake**") elif not os.path.exists(MODEL_PATH): st.warning(f"Missing model: {MODEL_PATH}")