Spaces:

drvpokhilko
/

huggingface_spam_not_spam_email_classifier

Sleeping

App Files Files Community

drvpokhilko commited on Jan 24

Commit

6ebf166

verified ·

1 Parent(s): 9188b40

Uploading spam not spam email classifier demo app.py

Browse files

Files changed (3) hide show

README.md +12 -6
app.py +50 -0
requirements.txt +3 -0

README.md CHANGED Viewed

@@ -1,12 +1,18 @@
 ---
-title: Huggingface Spam Not Spam Email Classifier
-emoji: 🌍
-colorFrom: indigo
-colorTo: red
 sdk: gradio
-sdk_version: 5.13.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Spam Not Spam Email Classifier
+emoji: 👩‍💻⌨️📧
+colorFrom: purple
+colorTo: gray
 sdk: gradio
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+# 👩‍💻⌨️📧 Spam Not Spam Email Classifier
+Small demo to showcase a text classifier to demonstrate if an email is spam or not spam.
+DistillBERT model fine-tuned on a relatively small dataset of ~11k email samples - [Deysi's spam detection dataset](https://huggingface.co/datasets/Deysi/spam-detection-dataset).

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+# 1. Import the required packages
+import torch
+import gradio as gr
+from typing import Dict
+from transformers import pipeline
+# 2. Define function to use our model on given text
+def spam_not_spam_classifier(text: str) -> Dict[str, float]:
+  """
+  Takes an input string of text and classifies it into spam/not_spam in the form of a dictionary.
+  """
+  # 2. Setup the pipeline to use the local model (or Hugging Face model path)
+  spam_not_spam_classifier = pipeline(task="text-classification",
+                                      model="drvpokhilko/huggingface_spam_not_spam_classifier-distilbert-base-uncased",
+                                      batch_size=32,
+                                      device="cuda" if torch.cuda.is_available() else "cpu", # set the device to work in any environment
+                                      top_k=None) # return all possible scores (not just top-1)
+  # 3. Get outputs from pipeline (as a list of dicts)
+  outputs = spam_not_spam_classifier(text)[0]
+  # 4. Format output for Gradio (e.g., {"label_1": probability_1, "label_2": probability_2})
+  output_dict = {}
+  for item in outputs:
+    output_dict[item["label"]] = item["score"]
+  return output_dict
+# 3. Create a Gradio interface with details about our app
+description = """
+A text classifier to determine if an email text is spam or not spam.
+Fine-tuned from [DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased) on a relatively small [dataset (~11k samples) of spam or not spam emails](https://huggingface.co/datasets/Deysi/spam-detection-dataset).
+"""
+demo = gr.Interface(fn=spam_not_spam_classifier,
+                    inputs="text",
+                    outputs=gr.Label(num_top_classes=2),
+                    title="📧⌨️👩‍💻Spam or Not Spam Email Classifier",
+                    description=description,
+                    examples=[["Hi John, here's the project report you requested. Let me know if you need any changes."],
+                              ["Get access to unlimited movies and TV shows for free. Sign up today!"]])
+# 4. Launch the interface
+if __name__ == "__main__":
+  demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==5.12.0
+torch==2.5.1
+transformers==4.47.1