Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -77,9 +77,8 @@ def preprocess_input(input_text, tokenizer, max_length):
|
|
77 |
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
|
78 |
return padded_sequences
|
79 |
|
80 |
-
def get_prediction(input_text,
|
81 |
-
|
82 |
-
if is_url:
|
83 |
cleaned_text = preprocess_url(input_text)
|
84 |
input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
|
85 |
input_data = [input_data, np.zeros((1, max_html_length))] # dummy HTML input
|
@@ -91,8 +90,17 @@ def get_prediction(input_text, input_type):
|
|
91 |
prediction = model.predict(input_data)[0][0]
|
92 |
return prediction
|
93 |
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
if prediction > 0.5:
|
97 |
return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
|
98 |
else:
|
@@ -100,10 +108,7 @@ def phishing_detection(input_text, input_type):
|
|
100 |
|
101 |
iface = gr.Interface(
|
102 |
fn=phishing_detection,
|
103 |
-
inputs=
|
104 |
-
gr.components.Textbox(lines=5, placeholder="Enter URL or HTML code"),
|
105 |
-
gr.components.Radio(["URL", "HTML"], type="value", label="Input Type")
|
106 |
-
],
|
107 |
outputs=gr.components.Textbox(label="Phishing Detection Result"),
|
108 |
title="Phishing Detection Model",
|
109 |
description="Check if a URL or HTML is Phishing.",
|
|
|
77 |
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
|
78 |
return padded_sequences
|
79 |
|
80 |
+
def get_prediction(input_text, is_html):
|
81 |
+
if not is_html:
|
|
|
82 |
cleaned_text = preprocess_url(input_text)
|
83 |
input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
|
84 |
input_data = [input_data, np.zeros((1, max_html_length))] # dummy HTML input
|
|
|
90 |
prediction = model.predict(input_data)[0][0]
|
91 |
return prediction
|
92 |
|
93 |
+
# List of known safe domains to help prevent false positives
|
94 |
+
safe_domains = ['perplexity.ai', 'google.com', 'wikipedia.org']
|
95 |
+
|
96 |
+
def phishing_detection(input_text):
|
97 |
+
domain = extract_domain(input_text)
|
98 |
+
if domain in safe_domains:
|
99 |
+
return f"Safe: This site is a known safe domain. (Domain: {domain})"
|
100 |
+
|
101 |
+
is_html = bool(re.search(r'<[^>]+>', input_text))
|
102 |
+
prediction = get_prediction(input_text, is_html)
|
103 |
+
|
104 |
if prediction > 0.5:
|
105 |
return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
|
106 |
else:
|
|
|
108 |
|
109 |
iface = gr.Interface(
|
110 |
fn=phishing_detection,
|
111 |
+
inputs=gr.components.Textbox(lines=5, placeholder="Enter URL or HTML code"),
|
|
|
|
|
|
|
112 |
outputs=gr.components.Textbox(label="Phishing Detection Result"),
|
113 |
title="Phishing Detection Model",
|
114 |
description="Check if a URL or HTML is Phishing.",
|