import streamlit as st import joblib import nltk from nltk.corpus import stopwords from nltk.tokenize import word_tokenize import string import re nltk.download('punkt') nltk.download('stopwords') def preprocess_text(text): text = text.lower() text = ''.join([char for char in text if char not in string.punctuation]) text = re.sub(r'\d+', '', text) text = ' '.join(text.split()) tokens = word_tokenize(text) stop_words = set(stopwords.words('english')) tokens = [token for token in tokens if token not in stop_words] return ' '.join(tokens) model = joblib.load('spam_detector_model.joblib') vectorizer = joblib.load('tfidf_vectorizer.joblib') st.title("📧 Spam Message Detector") st.write(""" This app detects whether a message is spam or not. Enter your message below and click 'Analyze' to check! """) message = st.text_area("Enter your message:", height=100) if st.button("Analyze"): if message: processed_text = preprocess_text(message) text_vectorized = vectorizer.transform([processed_text]) prediction = model.predict(text_vectorized)[0] probability = model.predict_proba(text_vectorized)[0] st.markdown("### Analysis Result") if prediction == 1: st.error("🚨 This message is likely SPAM!") st.write(f"Confidence: {probability[1]:.2%}") else: st.success("✅ This message appears to be legitimate.") st.write(f"Confidence: {probability[0]:.2%}") with st.expander("See preprocessing steps"): st.write("Original message:", message) st.write("Processed message:", processed_text) else: st.warning("Please enter a message to analyze.") with st.sidebar: st.header("About the Model") st.write(""" This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages. Model Performance: - Training Accuracy: 99.7% - Testing Accuracy: 98.9% """)