rmdhirr commited on
Commit
65ec533
·
verified ·
1 Parent(s): bccb3f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -20
app.py CHANGED
@@ -34,7 +34,7 @@ def normalize_length(text, target_length=50):
34
  if len(text) < target_length:
35
  text = text + " " * (target_length - len(text))
36
  else:
37
- text = text[:target_length]
38
  return text
39
 
40
  def preprocess_url(url):
@@ -77,30 +77,19 @@ def preprocess_input(input_text, tokenizer, max_length):
77
  padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
78
  return padded_sequences
79
 
80
- def get_prediction(input_text, is_html):
81
- if not is_html:
82
- cleaned_text = preprocess_url(input_text)
83
- input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
84
- input_data = [input_data, np.zeros((1, max_html_length))] # dummy HTML input
85
- else:
86
- cleaned_text = preprocess_html(input_text)
87
- input_data = preprocess_input(cleaned_text, html_tokenizer, max_html_length)
88
- input_data = [np.zeros((1, max_url_length)), input_data] # dummy URL input
89
 
 
90
  prediction = model.predict(input_data)[0][0]
91
  return prediction
92
 
93
- # List of known safe domains to help prevent false positives
94
- safe_domains = ['perplexity.ai', 'google.com', 'wikipedia.org']
95
-
96
  def phishing_detection(input_text):
97
- domain = extract_domain(input_text)
98
- if domain in safe_domains:
99
- return f"Safe: This site is a known safe domain. (Domain: {domain})"
100
-
101
- is_html = bool(re.search(r'<[^>]+>', input_text))
102
- prediction = get_prediction(input_text, is_html)
103
-
104
  if prediction > 0.5:
105
  return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
106
  else:
 
34
  if len(text) < target_length:
35
  text = text + " " * (target_length - len(text))
36
  else:
37
+ text = text[: target_length]
38
  return text
39
 
40
  def preprocess_url(url):
 
77
  padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
78
  return padded_sequences
79
 
80
+ def get_prediction(input_text):
81
+ cleaned_url = preprocess_url(input_text)
82
+ cleaned_html = preprocess_html(input_text)
83
+
84
+ url_data = preprocess_input(cleaned_url, url_tokenizer, max_url_length)
85
+ html_data = preprocess_input(cleaned_html, html_tokenizer, max_html_length)
 
 
 
86
 
87
+ input_data = [url_data, html_data]
88
  prediction = model.predict(input_data)[0][0]
89
  return prediction
90
 
 
 
 
91
  def phishing_detection(input_text):
92
+ prediction = get_prediction(input_text)
 
 
 
 
 
 
93
  if prediction > 0.5:
94
  return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
95
  else: