Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,7 +34,7 @@ def normalize_length(text, target_length=50):
|
|
34 |
if len(text) < target_length:
|
35 |
text = text + " " * (target_length - len(text))
|
36 |
else:
|
37 |
-
text = text[:target_length]
|
38 |
return text
|
39 |
|
40 |
def preprocess_url(url):
|
@@ -77,30 +77,19 @@ def preprocess_input(input_text, tokenizer, max_length):
|
|
77 |
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
|
78 |
return padded_sequences
|
79 |
|
80 |
-
def get_prediction(input_text
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
cleaned_text = preprocess_html(input_text)
|
87 |
-
input_data = preprocess_input(cleaned_text, html_tokenizer, max_html_length)
|
88 |
-
input_data = [np.zeros((1, max_url_length)), input_data] # dummy URL input
|
89 |
|
|
|
90 |
prediction = model.predict(input_data)[0][0]
|
91 |
return prediction
|
92 |
|
93 |
-
# List of known safe domains to help prevent false positives
|
94 |
-
safe_domains = ['perplexity.ai', 'google.com', 'wikipedia.org']
|
95 |
-
|
96 |
def phishing_detection(input_text):
|
97 |
-
|
98 |
-
if domain in safe_domains:
|
99 |
-
return f"Safe: This site is a known safe domain. (Domain: {domain})"
|
100 |
-
|
101 |
-
is_html = bool(re.search(r'<[^>]+>', input_text))
|
102 |
-
prediction = get_prediction(input_text, is_html)
|
103 |
-
|
104 |
if prediction > 0.5:
|
105 |
return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
|
106 |
else:
|
|
|
34 |
if len(text) < target_length:
|
35 |
text = text + " " * (target_length - len(text))
|
36 |
else:
|
37 |
+
text = text[: target_length]
|
38 |
return text
|
39 |
|
40 |
def preprocess_url(url):
|
|
|
77 |
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
|
78 |
return padded_sequences
|
79 |
|
80 |
+
def get_prediction(input_text):
|
81 |
+
cleaned_url = preprocess_url(input_text)
|
82 |
+
cleaned_html = preprocess_html(input_text)
|
83 |
+
|
84 |
+
url_data = preprocess_input(cleaned_url, url_tokenizer, max_url_length)
|
85 |
+
html_data = preprocess_input(cleaned_html, html_tokenizer, max_html_length)
|
|
|
|
|
|
|
86 |
|
87 |
+
input_data = [url_data, html_data]
|
88 |
prediction = model.predict(input_data)[0][0]
|
89 |
return prediction
|
90 |
|
|
|
|
|
|
|
91 |
def phishing_detection(input_text):
|
92 |
+
prediction = get_prediction(input_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
if prediction > 0.5:
|
94 |
return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
|
95 |
else:
|