Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -162,20 +162,20 @@ lexical_all = results[1]
|
|
162 |
|
163 |
# 2) Filter out content < 20 chars (as intermediate fix to problem that e.g. super short paragraphs with few chars get high similarity score)
|
164 |
semantic_all = [
|
165 |
-
r for r in semantic_all if len(r.payload["page_content"]) >=
|
166 |
]
|
167 |
lexical_all = [
|
168 |
-
r for r in lexical_all if len(r.payload["page_content"]) >=
|
169 |
]
|
170 |
|
171 |
# 2) Apply a threshold to SEMANTIC results (score >= 0.4)
|
172 |
-
semantic_thresholded = [r for r in semantic_all if r.score >= 0.
|
173 |
|
174 |
# 2) Filter the entire sets
|
175 |
filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter) ## , end_year_range ToDo add end_year filter again
|
176 |
filtered_lexical = filter_results(lexical_all, country_filter, region_filter)## , end_year_range ToDo add end_year filter again
|
177 |
|
178 |
-
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
|
179 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
180 |
|
181 |
|
|
|
162 |
|
163 |
# 2) Filter out content < 20 chars (as intermediate fix to problem that e.g. super short paragraphs with few chars get high similarity score)
|
164 |
semantic_all = [
|
165 |
+
r for r in semantic_all if len(r.payload["page_content"]) >= 5
|
166 |
]
|
167 |
lexical_all = [
|
168 |
+
r for r in lexical_all if len(r.payload["page_content"]) >= 5
|
169 |
]
|
170 |
|
171 |
# 2) Apply a threshold to SEMANTIC results (score >= 0.4)
|
172 |
+
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
173 |
|
174 |
# 2) Filter the entire sets
|
175 |
filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter) ## , end_year_range ToDo add end_year filter again
|
176 |
filtered_lexical = filter_results(lexical_all, country_filter, region_filter)## , end_year_range ToDo add end_year filter again
|
177 |
|
178 |
+
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
|
179 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
180 |
|
181 |
|