drvpokhilko commited on
Commit
6ebf166
Β·
verified Β·
1 Parent(s): 9188b40

Uploading spam not spam email classifier demo app.py

Browse files
Files changed (3) hide show
  1. README.md +12 -6
  2. app.py +50 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,18 @@
1
  ---
2
- title: Huggingface Spam Not Spam Email Classifier
3
- emoji: 🌍
4
- colorFrom: indigo
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.13.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
  ---
2
+ title: Spam Not Spam Email Classifier
3
+ emoji: πŸ‘©β€πŸ’»βŒ¨οΈπŸ“§
4
+ colorFrom: purple
5
+ colorTo: gray
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
+ license: apache-2.0
10
  ---
11
 
12
+ # πŸ‘©β€πŸ’»βŒ¨οΈπŸ“§ Spam Not Spam Email Classifier
13
+
14
+ Small demo to showcase a text classifier to demonstrate if an email is spam or not spam.
15
+
16
+ DistillBERT model fine-tuned on a relatively small dataset of ~11k email samples - [Deysi's spam detection dataset](https://huggingface.co/datasets/Deysi/spam-detection-dataset).
17
+
18
+
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # 1. Import the required packages
3
+ import torch
4
+ import gradio as gr
5
+
6
+ from typing import Dict
7
+ from transformers import pipeline
8
+
9
+ # 2. Define function to use our model on given text
10
+ def spam_not_spam_classifier(text: str) -> Dict[str, float]:
11
+ """
12
+ Takes an input string of text and classifies it into spam/not_spam in the form of a dictionary.
13
+ """
14
+
15
+ # 2. Setup the pipeline to use the local model (or Hugging Face model path)
16
+ spam_not_spam_classifier = pipeline(task="text-classification",
17
+ model="drvpokhilko/huggingface_spam_not_spam_classifier-distilbert-base-uncased",
18
+ batch_size=32,
19
+ device="cuda" if torch.cuda.is_available() else "cpu", # set the device to work in any environment
20
+ top_k=None) # return all possible scores (not just top-1)
21
+
22
+ # 3. Get outputs from pipeline (as a list of dicts)
23
+ outputs = spam_not_spam_classifier(text)[0]
24
+
25
+ # 4. Format output for Gradio (e.g., {"label_1": probability_1, "label_2": probability_2})
26
+ output_dict = {}
27
+
28
+ for item in outputs:
29
+ output_dict[item["label"]] = item["score"]
30
+
31
+ return output_dict
32
+
33
+ # 3. Create a Gradio interface with details about our app
34
+ description = """
35
+ A text classifier to determine if an email text is spam or not spam.
36
+
37
+ Fine-tuned from [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) on a relatively small [dataset (~11k samples) of spam or not spam emails](https://huggingface.co/datasets/Deysi/spam-detection-dataset).
38
+ """
39
+
40
+ demo = gr.Interface(fn=spam_not_spam_classifier,
41
+ inputs="text",
42
+ outputs=gr.Label(num_top_classes=2),
43
+ title="πŸ“§βŒ¨οΈπŸ‘©β€πŸ’»Spam or Not Spam Email Classifier",
44
+ description=description,
45
+ examples=[["Hi John, here's the project report you requested. Let me know if you need any changes."],
46
+ ["Get access to unlimited movies and TV shows for free. Sign up today!"]])
47
+
48
+ # 4. Launch the interface
49
+ if __name__ == "__main__":
50
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.12.0
2
+ torch==2.5.1
3
+ transformers==4.47.1