rmdhirr commited on
Commit
bccb3f8
·
verified ·
1 Parent(s): 102a386

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -77,9 +77,8 @@ def preprocess_input(input_text, tokenizer, max_length):
77
  padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
78
  return padded_sequences
79
 
80
- def get_prediction(input_text, input_type):
81
- is_url = input_type == "URL"
82
- if is_url:
83
  cleaned_text = preprocess_url(input_text)
84
  input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
85
  input_data = [input_data, np.zeros((1, max_html_length))] # dummy HTML input
@@ -91,8 +90,17 @@ def get_prediction(input_text, input_type):
91
  prediction = model.predict(input_data)[0][0]
92
  return prediction
93
 
94
- def phishing_detection(input_text, input_type):
95
- prediction = get_prediction(input_text, input_type)
 
 
 
 
 
 
 
 
 
96
  if prediction > 0.5:
97
  return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
98
  else:
@@ -100,10 +108,7 @@ def phishing_detection(input_text, input_type):
100
 
101
  iface = gr.Interface(
102
  fn=phishing_detection,
103
- inputs=[
104
- gr.components.Textbox(lines=5, placeholder="Enter URL or HTML code"),
105
- gr.components.Radio(["URL", "HTML"], type="value", label="Input Type")
106
- ],
107
  outputs=gr.components.Textbox(label="Phishing Detection Result"),
108
  title="Phishing Detection Model",
109
  description="Check if a URL or HTML is Phishing.",
 
77
  padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
78
  return padded_sequences
79
 
80
+ def get_prediction(input_text, is_html):
81
+ if not is_html:
 
82
  cleaned_text = preprocess_url(input_text)
83
  input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
84
  input_data = [input_data, np.zeros((1, max_html_length))] # dummy HTML input
 
90
  prediction = model.predict(input_data)[0][0]
91
  return prediction
92
 
93
+ # List of known safe domains to help prevent false positives
94
+ safe_domains = ['perplexity.ai', 'google.com', 'wikipedia.org']
95
+
96
+ def phishing_detection(input_text):
97
+ domain = extract_domain(input_text)
98
+ if domain in safe_domains:
99
+ return f"Safe: This site is a known safe domain. (Domain: {domain})"
100
+
101
+ is_html = bool(re.search(r'<[^>]+>', input_text))
102
+ prediction = get_prediction(input_text, is_html)
103
+
104
  if prediction > 0.5:
105
  return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
106
  else:
 
108
 
109
  iface = gr.Interface(
110
  fn=phishing_detection,
111
+ inputs=gr.components.Textbox(lines=5, placeholder="Enter URL or HTML code"),
 
 
 
112
  outputs=gr.components.Textbox(label="Phishing Detection Result"),
113
  title="Phishing Detection Model",
114
  description="Check if a URL or HTML is Phishing.",