annikwag commited on
Commit
77a1d81
verified
1 Parent(s): 7da963b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -38
app.py CHANGED
@@ -95,23 +95,23 @@ else:
95
  with col2:
96
  country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names) # Display filtered country names
97
 
98
- # # Year range slider # ToDo add end_year filter again
99
- # with col3:
100
- # current_year = datetime.now().year
101
- # default_start_year = current_year - 5
102
-
103
- # # 3) The max_value is now the actual max end_year from collection
104
- # end_year_range = st.slider(
105
- # "Project End Year",
106
- # min_value=2010,
107
- # max_value=max_end_year,
108
- # value=(default_start_year, max_end_year),
109
- # )
110
 
111
  # Checkbox to control whether to show only exact matches
112
  show_exact_matches = st.checkbox("Show only exact matches", value=False)
113
 
114
- def filter_results(results, country_filter, region_filter): ## , end_year_range ToDo add end_year filter again
115
  filtered = []
116
  for r in results:
117
  metadata = r.payload.get('metadata', {})
@@ -146,7 +146,7 @@ def filter_results(results, country_filter, region_filter): ## , end_year_range
146
  if (
147
  (country_filter == "All/Not allocated" or selected_iso_code in c_list)
148
  and (region_filter == "All/Not allocated" or countries_in_region)
149
- # and (end_year_range[0] <= end_year_val <= end_year_range[1]) # ToDo add end_year filter again
150
  ):
151
  filtered.append(r)
152
  return filtered
@@ -172,8 +172,8 @@ lexical_all = [
172
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
173
 
174
  # 2) Filter the entire sets
175
- filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter) ## , end_year_range ToDo add end_year filter again
176
- filtered_lexical = filter_results(lexical_all, country_filter, region_filter)## , end_year_range ToDo add end_year filter again
177
 
178
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
179
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
@@ -205,8 +205,8 @@ if show_exact_matches:
205
 
206
  # 3) Now apply your region/country/year filter on that new list
207
  filtered_lexical = filter_results(
208
- lexical_substring_filtered, country_filter, region_filter
209
- ) ## , end_year_range ToDo add end_year filter again
210
 
211
  # 4) Remove duplicates
212
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
@@ -277,8 +277,10 @@ if show_exact_matches:
277
 
278
  additional_text = (
279
  f"Commissioned by **{client_name}**\n"
 
280
  f"Projekt duration **{start_year_str}-{end_year_str}**\n"
281
- f"Budget: Project: **{formatted_project_budget}**, total volume: **{formatted_total_volume}**"
 
282
  )
283
  st.markdown(additional_text)
284
  st.divider()
@@ -291,32 +293,37 @@ else:
291
  else:
292
  # Show the top 15 from filtered_semantic
293
  for res in filtered_semantic_no_dupe[:15]:
 
 
 
 
 
 
 
 
 
294
  project_name = res.payload['metadata'].get('project_name', 'Project Link')
295
- url = res.payload['metadata'].get('url', '#')
296
- st.markdown(f"#### [{project_name}]({url})")
297
 
298
- # Snippet logic
299
- full_text = res.payload['page_content']
300
- words = full_text.split()
301
- preview_word_count = 10
 
 
 
 
 
302
  preview_text = " ".join(words[:preview_word_count])
303
  remainder_text = " ".join(words[preview_word_count:])
304
  st.write(preview_text + ("..." if remainder_text else ""))
305
 
306
  # Keywords
 
307
  top_keywords = extract_top_keywords(full_text, top_n=5)
308
  if top_keywords:
309
  st.markdown(f"_{' 路 '.join(top_keywords)}_")
310
-
311
- # Metadata
312
- metadata = res.payload.get('metadata', {})
313
- countries = metadata.get('countries', "[]")
314
- client_name = metadata.get('client', 'Unknown Client')
315
- start_year = metadata.get('start_year', None)
316
- end_year = metadata.get('end_year', None)
317
- total_volume = metadata.get('total_volume', "Unknown")
318
- total_project = metadata.get('total_project', "Unknown")
319
- id = metadata.get('id', "Unknown")
320
 
321
  try:
322
  c_list = json.loads(countries.replace("'", '"'))
@@ -339,16 +346,27 @@ else:
339
  # Format the year range
340
  start_year_str = extract_year(start_year) if start_year else "Unknown"
341
  end_year_str = extract_year(end_year) if end_year else "Unknown"
 
 
 
 
342
 
343
  # Build the final string
344
  if matched_countries:
345
  additional_text = (
346
- f"**{', '.join(matched_countries)}**, commissioned by **{client_name}**, "
347
- f"**{start_year_str}-{end_year_str}**, project ID: {id}, project budget: {total_project}, total volumne: {total_volume}"
 
 
 
348
  )
349
  else:
350
  additional_text = (
351
- f"Commissioned by **{client_name}**, **{start_year_str}-{end_year_str}**, project ID: {id}, project budget: {total_project}, total volumne: {total_volume}"
 
 
 
 
352
  )
353
 
354
 
 
95
  with col2:
96
  country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names) # Display filtered country names
97
 
98
+ # Year range slider # ToDo add end_year filter again
99
+ with col3:
100
+ current_year = datetime.now().year
101
+ default_start_year = current_year - 5
102
+
103
+ # 3) The max_value is now the actual max end_year from collection
104
+ end_year_range = st.slider(
105
+ "Project End Year",
106
+ min_value=2010,
107
+ max_value=max_end_year,
108
+ value=(default_start_year, max_end_year),
109
+ )
110
 
111
  # Checkbox to control whether to show only exact matches
112
  show_exact_matches = st.checkbox("Show only exact matches", value=False)
113
 
114
+ def filter_results(results, country_filter, region_filter, end_year_range): ## ToDo add end_year filter again
115
  filtered = []
116
  for r in results:
117
  metadata = r.payload.get('metadata', {})
 
146
  if (
147
  (country_filter == "All/Not allocated" or selected_iso_code in c_list)
148
  and (region_filter == "All/Not allocated" or countries_in_region)
149
+ and (end_year_range[0] <= end_year_val <= end_year_range[1]) # ToDo add end_year filter again
150
  ):
151
  filtered.append(r)
152
  return filtered
 
172
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
173
 
174
  # 2) Filter the entire sets
175
+ filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter, end_year_range) ## ToDo add end_year filter again
176
+ filtered_lexical = filter_results(lexical_all, country_filter, region_filter, end_year_range)## ToDo add end_year filter again
177
 
178
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
179
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
 
205
 
206
  # 3) Now apply your region/country/year filter on that new list
207
  filtered_lexical = filter_results(
208
+ lexical_substring_filtered, country_filter, region_filter, end_year_range
209
+ ) ## ToDo add end_year filter again
210
 
211
  # 4) Remove duplicates
212
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
 
277
 
278
  additional_text = (
279
  f"Commissioned by **{client_name}**\n"
280
+
281
  f"Projekt duration **{start_year_str}-{end_year_str}**\n"
282
+
283
+ f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**"
284
  )
285
  st.markdown(additional_text)
286
  st.divider()
 
293
  else:
294
  # Show the top 15 from filtered_semantic
295
  for res in filtered_semantic_no_dupe[:15]:
296
+ # Metadata
297
+ metadata = res.payload.get('metadata', {})
298
+ countries = metadata.get('countries', "[]")
299
+ client_name = metadata.get('client', 'Unknown Client')
300
+ start_year = metadata.get('start_year', None)
301
+ end_year = metadata.get('end_year', None)
302
+ total_volume = metadata.get('total_volume', "Unknown")
303
+ total_project = metadata.get('total_project', "Unknown")
304
+ id = metadata.get('id', "Unknown")
305
  project_name = res.payload['metadata'].get('project_name', 'Project Link')
306
+ proj_id = metadata.get('id', 'Unknown')
307
+ st.markdown(f"#### {project_name} [{proj_id}]")
308
 
309
+ # Snippet logic (80 words)
310
+ # Build snippet from objectives and descriptions.
311
+ objectives = metadata.get("objectives", "")
312
+ desc_de = metadata.get("description.de", "")
313
+ desc_en = metadata.get("description.en", "")
314
+ description = desc_de if desc_de else desc_en
315
+ full_snippet = f"Objective: {objectives} Description: {description}"
316
+ words = full_snippet.split()
317
+ preview_word_count = 200
318
  preview_text = " ".join(words[:preview_word_count])
319
  remainder_text = " ".join(words[preview_word_count:])
320
  st.write(preview_text + ("..." if remainder_text else ""))
321
 
322
  # Keywords
323
+ full_text = res.payload['page_content']
324
  top_keywords = extract_top_keywords(full_text, top_n=5)
325
  if top_keywords:
326
  st.markdown(f"_{' 路 '.join(top_keywords)}_")
 
 
 
 
 
 
 
 
 
 
327
 
328
  try:
329
  c_list = json.loads(countries.replace("'", '"'))
 
346
  # Format the year range
347
  start_year_str = extract_year(start_year) if start_year else "Unknown"
348
  end_year_str = extract_year(end_year) if end_year else "Unknown"
349
+
350
+ formatted_project_budget = format_currency(total_project)
351
+ formatted_total_volume = format_currency(total_volume)
352
+
353
 
354
  # Build the final string
355
  if matched_countries:
356
  additional_text = (
357
+ f"**{', '.join(matched_countries)} f"Commissioned by **{client_name}**\n"
358
+
359
+ f"Projekt duration **{start_year_str}-{end_year_str}**\n"
360
+
361
+ f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**"
362
  )
363
  else:
364
  additional_text = (
365
+ f"Commissioned by **{client_name}**\n"
366
+
367
+ f"Projekt duration **{start_year_str}-{end_year_str}**\n"
368
+
369
+ f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**"
370
  )
371
 
372