Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,67 +3,99 @@ from transformers import pipeline, AutoModelForSequenceClassification, AutoToken
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
|
6 |
-
def
|
7 |
-
# Load
|
8 |
spam_pipeline = pipeline("text-classification", model="cybersectony/phishing-email-detection-distilbert_v2.4.1")
|
9 |
-
|
10 |
-
# Load the sentiment model and tokenizer
|
11 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("ISOM5240GP4/email_sentiment", num_labels=2)
|
12 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
st.title("Email Analysis Tool")
|
15 |
-
st.write("Enter an email body below or
|
16 |
|
|
|
17 |
if "email_body" not in st.session_state:
|
18 |
st.session_state.email_body = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
26 |
|
|
|
27 |
col1, col2, col3 = st.columns(3)
|
28 |
with col1:
|
29 |
-
if st.button("Spam
|
30 |
st.session_state.email_body = sample_spam
|
|
|
31 |
st.rerun()
|
32 |
with col2:
|
33 |
-
if st.button("
|
34 |
st.session_state.email_body = sample_not_spam_positive
|
|
|
35 |
st.rerun()
|
36 |
with col3:
|
37 |
-
if st.button("
|
38 |
st.session_state.email_body = sample_not_spam_negative
|
|
|
39 |
st.rerun()
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
spam_label = spam_result[0]["label"]
|
45 |
-
spam_confidence = spam_result[0]["score"]
|
46 |
-
|
47 |
-
if spam_label == "LABEL_1":
|
48 |
-
st.write(f"This is a spam email (Confidence: {spam_confidence:.2f}). No follow-up needed.")
|
49 |
-
else:
|
50 |
-
inputs = tokenizer(email_body, padding=True, truncation=True, return_tensors='pt')
|
51 |
-
outputs = sentiment_model(**inputs)
|
52 |
-
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
53 |
-
predictions = predictions.cpu().detach().numpy()
|
54 |
-
sentiment_index = np.argmax(predictions)
|
55 |
-
sentiment_confidence = predictions[0][sentiment_index]
|
56 |
-
|
57 |
-
sentiment = "Positive" if sentiment_index == 1 else "Negative"
|
58 |
-
if sentiment == "Positive":
|
59 |
-
st.write(f"This email is not spam (Confidence: {spam_confidence:.2f}).")
|
60 |
-
st.write(f"Sentiment: {sentiment} (Confidence: {sentiment_confidence:.2f}). No follow-up needed.")
|
61 |
-
else:
|
62 |
-
st.write(f"This email is not spam (Confidence: {spam_confidence:.2f}).")
|
63 |
-
st.write(f"Sentiment: {sentiment} (Confidence: {sentiment_confidence:.2f}).")
|
64 |
-
st.write("**This email needs follow-up as it is not spam and has negative sentiment.**")
|
65 |
-
else:
|
66 |
-
st.write("Please enter an email body or select a sample to analyze.")
|
67 |
|
68 |
if __name__ == "__main__":
|
69 |
main()
|
|
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
|
6 |
+
def analyze_email(email_body):
|
7 |
+
# Load models (ideally cached, but kept here for simplicity)
|
8 |
spam_pipeline = pipeline("text-classification", model="cybersectony/phishing-email-detection-distilbert_v2.4.1")
|
|
|
|
|
9 |
sentiment_model = AutoModelForSequenceClassification.from_pretrained("ISOM5240GP4/email_sentiment", num_labels=2)
|
10 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
11 |
|
12 |
+
# Step 1: Check if the email is spam
|
13 |
+
spam_result = spam_pipeline(email_body)
|
14 |
+
spam_label = spam_result[0]["label"]
|
15 |
+
spam_confidence = spam_result[0]["score"]
|
16 |
+
|
17 |
+
if spam_label == "LABEL_1":
|
18 |
+
return f"This is a spam email (Confidence: {spam_confidence:.2f}). No follow-up needed."
|
19 |
+
else:
|
20 |
+
# Step 2: Analyze sentiment for non-spam emails
|
21 |
+
inputs = tokenizer(email_body, padding=True, truncation=True, return_tensors='pt')
|
22 |
+
outputs = sentiment_model(**inputs)
|
23 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
24 |
+
predictions = predictions.cpu().detach().numpy()
|
25 |
+
sentiment_index = np.argmax(predictions)
|
26 |
+
sentiment_confidence = predictions[0][sentiment_index]
|
27 |
+
sentiment = "Positive" if sentiment_index == 1 else "Negative"
|
28 |
+
|
29 |
+
if sentiment == "Positive":
|
30 |
+
return (f"This email is not spam (Confidence: {spam_confidence:.2f}).\n"
|
31 |
+
f"Sentiment: {sentiment} (Confidence: {sentiment_confidence:.2f}). No follow-up needed.")
|
32 |
+
else:
|
33 |
+
return (f"This email is not spam (Confidence: {spam_confidence:.2f}).\n"
|
34 |
+
f"Sentiment: {sentiment} (Confidence: {sentiment_confidence:.2f}).\n"
|
35 |
+
"**This email needs follow-up as it is not spam and has negative sentiment.**")
|
36 |
+
|
37 |
+
def main():
|
38 |
st.title("Email Analysis Tool")
|
39 |
+
st.write("Enter an email body below or click a sample to analyze its spam status and sentiment.")
|
40 |
|
41 |
+
# Initialize session state
|
42 |
if "email_body" not in st.session_state:
|
43 |
st.session_state.email_body = ""
|
44 |
+
if "result" not in st.session_state:
|
45 |
+
st.session_state.result = ""
|
46 |
+
|
47 |
+
# Text area for email input
|
48 |
+
email_body = st.text_area("Email Body", value=st.session_state.email_body, height=200, key="email_input",
|
49 |
+
on_change=lambda: st.session_state.update({"result": analyze_email(st.session_state.email_input)}))
|
50 |
+
|
51 |
+
# Sample emails (stored as variables, not button labels)
|
52 |
+
sample_spam = """
|
53 |
+
Subject: Urgent: Verify Your Account Now!
|
54 |
+
Dear Customer,
|
55 |
+
We have detected unusual activity on your account. To prevent suspension, please verify your login details immediately by clicking the link below:
|
56 |
+
[Click Here to Verify](http://totally-legit-site.com/verify)
|
57 |
+
Failure to verify within 24 hours will result in your account being locked. This is for your security.
|
58 |
+
Best regards,
|
59 |
+
The Security Team
|
60 |
+
"""
|
61 |
|
62 |
+
sample_not_spam_positive = """
|
63 |
+
Subject: Great News About Your Project!
|
64 |
+
Hi Team,
|
65 |
+
I just wanted to let you know that the project is progressing wonderfully! Everyone’s efforts are paying off, and we’re ahead of schedule. Keep up the fantastic work!
|
66 |
+
Best,
|
67 |
+
Alex
|
68 |
+
"""
|
69 |
|
70 |
+
sample_not_spam_negative = """
|
71 |
+
Subject: Issue with Recent Delivery
|
72 |
+
Dear Support,
|
73 |
+
I received my package today, but it was damaged, and two items were missing. This is really frustrating—please let me know how we can resolve this as soon as possible.
|
74 |
+
Thanks,
|
75 |
+
Sarah
|
76 |
+
"""
|
77 |
|
78 |
+
# Buttons with concise labels (in columns)
|
79 |
col1, col2, col3 = st.columns(3)
|
80 |
with col1:
|
81 |
+
if st.button("Spam Sample"):
|
82 |
st.session_state.email_body = sample_spam
|
83 |
+
st.session_state.result = analyze_email(sample_spam)
|
84 |
st.rerun()
|
85 |
with col2:
|
86 |
+
if st.button("Positive Sample"):
|
87 |
st.session_state.email_body = sample_not_spam_positive
|
88 |
+
st.session_state.result = analyze_email(sample_not_spam_positive)
|
89 |
st.rerun()
|
90 |
with col3:
|
91 |
+
if st.button("Negative Sample"):
|
92 |
st.session_state.email_body = sample_not_spam_negative
|
93 |
+
st.session_state.result = analyze_email(sample_not_spam_negative)
|
94 |
st.rerun()
|
95 |
|
96 |
+
# Display result
|
97 |
+
if st.session_state.result:
|
98 |
+
st.write(st.session_state.result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
if __name__ == "__main__":
|
101 |
main()
|