File size: 2,100 Bytes
3c1717d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
import joblib
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import re
nltk.download('punkt')
nltk.download('stopwords')
def preprocess_text(text):
text = text.lower()
text = ''.join([char for char in text if char not in string.punctuation])
text = re.sub(r'\d+', '', text)
text = ' '.join(text.split())
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token not in stop_words]
return ' '.join(tokens)
model = joblib.load('spam_detector_model.joblib')
vectorizer = joblib.load('tfidf_vectorizer.joblib')
st.title("📧 Spam Message Detector")
st.write("""
This app detects whether a message is spam or not.
Enter your message below and click 'Analyze' to check!
""")
message = st.text_area("Enter your message:", height=100)
if st.button("Analyze"):
if message:
processed_text = preprocess_text(message)
text_vectorized = vectorizer.transform([processed_text])
prediction = model.predict(text_vectorized)[0]
probability = model.predict_proba(text_vectorized)[0]
st.markdown("### Analysis Result")
if prediction == 1:
st.error("🚨 This message is likely SPAM!")
st.write(f"Confidence: {probability[1]:.2%}")
else:
st.success("✅ This message appears to be legitimate.")
st.write(f"Confidence: {probability[0]:.2%}")
with st.expander("See preprocessing steps"):
st.write("Original message:", message)
st.write("Processed message:", processed_text)
else:
st.warning("Please enter a message to analyze.")
with st.sidebar:
st.header("About the Model")
st.write("""
This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages.
Model Performance:
- Training Accuracy: 99.7%
- Testing Accuracy: 98.9%
""")
|