web-phishing-detection

Sleeping

rmdhirr commited on Jun 16, 2024

Commit

bccb3f8

verified ·

1 Parent(s): 102a386

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -77,9 +77,8 @@ def preprocess_input(input_text, tokenizer, max_length):
     padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
     return padded_sequences
-def get_prediction(input_text, input_type):
-    is_url = input_type == "URL"
-    if is_url:
         cleaned_text = preprocess_url(input_text)
         input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
         input_data = [input_data, np.zeros((1, max_html_length))]  # dummy HTML input
@@ -91,8 +90,17 @@ def get_prediction(input_text, input_type):
     prediction = model.predict(input_data)[0][0]
     return prediction
-def phishing_detection(input_text, input_type):
-    prediction = get_prediction(input_text, input_type)
     if prediction > 0.5:
         return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
     else:
@@ -100,10 +108,7 @@ def phishing_detection(input_text, input_type):
 iface = gr.Interface(
     fn=phishing_detection,
-    inputs=[
-        gr.components.Textbox(lines=5, placeholder="Enter URL or HTML code"),
-        gr.components.Radio(["URL", "HTML"], type="value", label="Input Type")
-    ],
     outputs=gr.components.Textbox(label="Phishing Detection Result"),
     title="Phishing Detection Model",
     description="Check if a URL or HTML is Phishing.",

     padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
     return padded_sequences
+def get_prediction(input_text, is_html):
+    if not is_html:
         cleaned_text = preprocess_url(input_text)
         input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
         input_data = [input_data, np.zeros((1, max_html_length))]  # dummy HTML input
     prediction = model.predict(input_data)[0][0]
     return prediction
+# List of known safe domains to help prevent false positives
+safe_domains = ['perplexity.ai', 'google.com', 'wikipedia.org']
+def phishing_detection(input_text):
+    domain = extract_domain(input_text)
+    if domain in safe_domains:
+        return f"Safe: This site is a known safe domain. (Domain: {domain})"
+    is_html = bool(re.search(r'<[^>]+>', input_text))
+    prediction = get_prediction(input_text, is_html)
     if prediction > 0.5:
         return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
     else:
 iface = gr.Interface(
     fn=phishing_detection,
+    inputs=gr.components.Textbox(lines=5, placeholder="Enter URL or HTML code"),
     outputs=gr.components.Textbox(label="Phishing Detection Result"),
     title="Phishing Detection Model",
     description="Check if a URL or HTML is Phishing.",