annikwag commited on
Commit
221e09e
·
verified ·
1 Parent(s): 0eaa45d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -162,20 +162,20 @@ lexical_all = results[1]
162
 
163
  # 2) Filter out content < 20 chars (as intermediate fix to problem that e.g. super short paragraphs with few chars get high similarity score)
164
  semantic_all = [
165
- r for r in semantic_all if len(r.payload["page_content"]) >= 20
166
  ]
167
  lexical_all = [
168
- r for r in lexical_all if len(r.payload["page_content"]) >= 20
169
  ]
170
 
171
  # 2) Apply a threshold to SEMANTIC results (score >= 0.4)
172
- semantic_thresholded = [r for r in semantic_all if r.score >= 0.4]
173
 
174
  # 2) Filter the entire sets
175
  filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter) ## , end_year_range ToDo add end_year filter again
176
  filtered_lexical = filter_results(lexical_all, country_filter, region_filter)## , end_year_range ToDo add end_year filter again
177
 
178
- filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
179
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
180
 
181
 
 
162
 
163
  # 2) Filter out content < 20 chars (as intermediate fix to problem that e.g. super short paragraphs with few chars get high similarity score)
164
  semantic_all = [
165
+ r for r in semantic_all if len(r.payload["page_content"]) >= 5
166
  ]
167
  lexical_all = [
168
+ r for r in lexical_all if len(r.payload["page_content"]) >= 5
169
  ]
170
 
171
  # 2) Apply a threshold to SEMANTIC results (score >= 0.4)
172
+ semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
173
 
174
  # 2) Filter the entire sets
175
  filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter) ## , end_year_range ToDo add end_year filter again
176
  filtered_lexical = filter_results(lexical_all, country_filter, region_filter)## , end_year_range ToDo add end_year filter again
177
 
178
+ filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
179
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
180
 
181