Uploading spam not spam email classifier demo app.py
Browse files- README.md +12 -6
- app.py +50 -0
- requirements.txt +3 -0
README.md
CHANGED
@@ -1,12 +1,18 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.13.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Spam Not Spam Email Classifier
|
3 |
+
emoji: π©βπ»β¨οΈπ§
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: gray
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
+
license: apache-2.0
|
10 |
---
|
11 |
|
12 |
+
# π©βπ»β¨οΈπ§ Spam Not Spam Email Classifier
|
13 |
+
|
14 |
+
Small demo to showcase a text classifier to demonstrate if an email is spam or not spam.
|
15 |
+
|
16 |
+
DistillBERT model fine-tuned on a relatively small dataset of ~11k email samples - [Deysi's spam detection dataset](https://huggingface.co/datasets/Deysi/spam-detection-dataset).
|
17 |
+
|
18 |
+
|
app.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# 1. Import the required packages
|
3 |
+
import torch
|
4 |
+
import gradio as gr
|
5 |
+
|
6 |
+
from typing import Dict
|
7 |
+
from transformers import pipeline
|
8 |
+
|
9 |
+
# 2. Define function to use our model on given text
|
10 |
+
def spam_not_spam_classifier(text: str) -> Dict[str, float]:
|
11 |
+
"""
|
12 |
+
Takes an input string of text and classifies it into spam/not_spam in the form of a dictionary.
|
13 |
+
"""
|
14 |
+
|
15 |
+
# 2. Setup the pipeline to use the local model (or Hugging Face model path)
|
16 |
+
spam_not_spam_classifier = pipeline(task="text-classification",
|
17 |
+
model="drvpokhilko/huggingface_spam_not_spam_classifier-distilbert-base-uncased",
|
18 |
+
batch_size=32,
|
19 |
+
device="cuda" if torch.cuda.is_available() else "cpu", # set the device to work in any environment
|
20 |
+
top_k=None) # return all possible scores (not just top-1)
|
21 |
+
|
22 |
+
# 3. Get outputs from pipeline (as a list of dicts)
|
23 |
+
outputs = spam_not_spam_classifier(text)[0]
|
24 |
+
|
25 |
+
# 4. Format output for Gradio (e.g., {"label_1": probability_1, "label_2": probability_2})
|
26 |
+
output_dict = {}
|
27 |
+
|
28 |
+
for item in outputs:
|
29 |
+
output_dict[item["label"]] = item["score"]
|
30 |
+
|
31 |
+
return output_dict
|
32 |
+
|
33 |
+
# 3. Create a Gradio interface with details about our app
|
34 |
+
description = """
|
35 |
+
A text classifier to determine if an email text is spam or not spam.
|
36 |
+
|
37 |
+
Fine-tuned from [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) on a relatively small [dataset (~11k samples) of spam or not spam emails](https://huggingface.co/datasets/Deysi/spam-detection-dataset).
|
38 |
+
"""
|
39 |
+
|
40 |
+
demo = gr.Interface(fn=spam_not_spam_classifier,
|
41 |
+
inputs="text",
|
42 |
+
outputs=gr.Label(num_top_classes=2),
|
43 |
+
title="π§β¨οΈπ©βπ»Spam or Not Spam Email Classifier",
|
44 |
+
description=description,
|
45 |
+
examples=[["Hi John, here's the project report you requested. Let me know if you need any changes."],
|
46 |
+
["Get access to unlimited movies and TV shows for free. Sign up today!"]])
|
47 |
+
|
48 |
+
# 4. Launch the interface
|
49 |
+
if __name__ == "__main__":
|
50 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==5.12.0
|
2 |
+
torch==2.5.1
|
3 |
+
transformers==4.47.1
|