Shiva7706's picture
Update app.py
3c1717d verified
raw
history blame contribute delete
2.1 kB
import streamlit as st
import joblib
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
import re
nltk.download('punkt')
nltk.download('stopwords')
def preprocess_text(text):
text = text.lower()
text = ''.join([char for char in text if char not in string.punctuation])
text = re.sub(r'\d+', '', text)
text = ' '.join(text.split())
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
tokens = [token for token in tokens if token not in stop_words]
return ' '.join(tokens)
model = joblib.load('spam_detector_model.joblib')
vectorizer = joblib.load('tfidf_vectorizer.joblib')
st.title("πŸ“§ Spam Message Detector")
st.write("""
This app detects whether a message is spam or not.
Enter your message below and click 'Analyze' to check!
""")
message = st.text_area("Enter your message:", height=100)
if st.button("Analyze"):
if message:
processed_text = preprocess_text(message)
text_vectorized = vectorizer.transform([processed_text])
prediction = model.predict(text_vectorized)[0]
probability = model.predict_proba(text_vectorized)[0]
st.markdown("### Analysis Result")
if prediction == 1:
st.error("🚨 This message is likely SPAM!")
st.write(f"Confidence: {probability[1]:.2%}")
else:
st.success("βœ… This message appears to be legitimate.")
st.write(f"Confidence: {probability[0]:.2%}")
with st.expander("See preprocessing steps"):
st.write("Original message:", message)
st.write("Processed message:", processed_text)
else:
st.warning("Please enter a message to analyze.")
with st.sidebar:
st.header("About the Model")
st.write("""
This spam detector uses an XGBoost classifier trained on a dataset of spam and legitimate messages.
Model Performance:
- Training Accuracy: 99.7%
- Testing Accuracy: 98.9%
""")