rmdhirr commited on
Commit
2e97462
·
verified ·
1 Parent(s): a04dd4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -11
app.py CHANGED
@@ -62,13 +62,6 @@ def preprocess_html(html):
62
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
63
  return ' '.join(tokens)
64
 
65
- def extract_features(url):
66
- features = {}
67
- features['length'] = len(url)
68
- features['num_special_chars'] = len(re.findall(r'[^a-zA-Z0-9]', url))
69
- features['num_digits'] = len(re.findall(r'\d', url))
70
- return features
71
-
72
  max_url_length = 180
73
  max_html_length = 2000
74
  max_words = 10000
@@ -91,11 +84,10 @@ def get_prediction(input_text):
91
  url_data = preprocess_input(cleaned_url, url_tokenizer, max_url_length)
92
  html_data = preprocess_input(cleaned_html, html_tokenizer, max_html_length)
93
 
94
- features = extract_features(input_text)
95
- features_vector = np.array([[features['length'], features['num_special_chars'], features['num_digits']]])
96
 
97
- input_data = [url_data, html_data, features_vector]
98
- prediction = model.predict(input_data)[0][0]
99
  return prediction
100
 
101
  def phishing_detection(input_text):
 
62
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
63
  return ' '.join(tokens)
64
 
 
 
 
 
 
 
 
65
  max_url_length = 180
66
  max_html_length = 2000
67
  max_words = 10000
 
84
  url_data = preprocess_input(cleaned_url, url_tokenizer, max_url_length)
85
  html_data = preprocess_input(cleaned_html, html_tokenizer, max_html_length)
86
 
87
+ # Combine URL and HTML data by concatenation
88
+ combined_data = np.concatenate((url_data, html_data), axis=1)
89
 
90
+ prediction = model.predict(combined_data)[0][0]
 
91
  return prediction
92
 
93
  def phishing_detection(input_text):