Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,7 +34,7 @@ def normalize_length(text, target_length=50):
|
|
34 |
if len(text) < target_length:
|
35 |
text = text + " " * (target_length - len(text))
|
36 |
else:
|
37 |
-
text = text[:
|
38 |
return text
|
39 |
|
40 |
def preprocess_url(url):
|
@@ -62,6 +62,13 @@ def preprocess_html(html):
|
|
62 |
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
63 |
return ' '.join(tokens)
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
max_url_length = 180
|
66 |
max_html_length = 2000
|
67 |
max_words = 10000
|
@@ -84,7 +91,10 @@ def get_prediction(input_text):
|
|
84 |
url_data = preprocess_input(cleaned_url, url_tokenizer, max_url_length)
|
85 |
html_data = preprocess_input(cleaned_html, html_tokenizer, max_html_length)
|
86 |
|
87 |
-
|
|
|
|
|
|
|
88 |
prediction = model.predict(input_data)[0][0]
|
89 |
return prediction
|
90 |
|
|
|
34 |
if len(text) < target_length:
|
35 |
text = text + " " * (target_length - len(text))
|
36 |
else:
|
37 |
+
text = text[:target_length]
|
38 |
return text
|
39 |
|
40 |
def preprocess_url(url):
|
|
|
62 |
tokens = [lemmatizer.lemmatize(word) for word in tokens]
|
63 |
return ' '.join(tokens)
|
64 |
|
65 |
+
def extract_features(url):
|
66 |
+
features = {}
|
67 |
+
features['length'] = len(url)
|
68 |
+
features['num_special_chars'] = len(re.findall(r'[^a-zA-Z0-9]', url))
|
69 |
+
features['num_digits'] = len(re.findall(r'\d', url))
|
70 |
+
return features
|
71 |
+
|
72 |
max_url_length = 180
|
73 |
max_html_length = 2000
|
74 |
max_words = 10000
|
|
|
91 |
url_data = preprocess_input(cleaned_url, url_tokenizer, max_url_length)
|
92 |
html_data = preprocess_input(cleaned_html, html_tokenizer, max_html_length)
|
93 |
|
94 |
+
features = extract_features(input_text)
|
95 |
+
features_vector = np.array([[features['length'], features['num_special_chars'], features['num_digits']]])
|
96 |
+
|
97 |
+
input_data = [url_data, html_data, features_vector]
|
98 |
prediction = model.predict(input_data)[0][0]
|
99 |
return prediction
|
100 |
|