annikwag commited on
Commit
90526f3
·
verified ·
1 Parent(s): 6a13cb6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -63
app.py CHANGED
@@ -22,12 +22,12 @@ from appStore.region_utils import (
22
  # TF-IDF part (excluded from the app for now)
23
  # from appStore.tfidf_extraction import extract_top_keywords
24
 
25
- # Import helper modules
26
  from appStore.rag_utils import (
27
  highlight_query,
28
  get_rag_answer,
29
  compute_title,
30
- format_project_id # <-- Imported formatting function
31
  )
32
  from appStore.filter_utils import (
33
  parse_budget,
@@ -127,14 +127,14 @@ unique_country_names = sorted(country_name_mapping.keys())
127
  # Define reset_filters function using session_state
128
  ###########################################
129
  def reset_filters():
130
- st.session_state["region_filter"] = ["All/Not allocated"]
131
- st.session_state["country_filter"] = ["All/Not allocated"]
132
  current_year = datetime.now().year
133
  default_start_year = current_year - 4
134
  st.session_state["end_year_range"] = (default_start_year, max_end_year)
135
- st.session_state["crs_filter"] = ["All/Not allocated"]
136
  st.session_state["min_budget"] = min_budget_val
137
- st.session_state["client_filter"] = ["All/Not allocated"]
138
  st.session_state["query"] = ""
139
  st.session_state["show_exact_matches"] = False
140
  st.session_state["page"] = 1
@@ -145,19 +145,17 @@ def reset_filters():
145
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
146
 
147
  with col1:
148
- region_filter = st.multiselect("Region", options=["All/Not allocated"] + sorted(unique_sub_regions),
149
- default=["All/Not allocated"], key="region_filter")
150
- if "All/Not allocated" in region_filter or not region_filter:
151
  filtered_country_names = unique_country_names
152
  else:
153
  filtered_country_names = [
154
  name for name, code in country_name_mapping.items()
155
- if iso_code_to_sub_region.get(code) in region_filter
156
  ]
157
 
158
  with col2:
159
- country_filter = st.multiselect("Country", options=["All/Not allocated"] + filtered_country_names,
160
- default=["All/Not allocated"], key="country_filter")
161
 
162
  with col3:
163
  current_year = datetime.now().year
@@ -172,8 +170,7 @@ with col3:
172
 
173
  with col4:
174
  crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
175
- crs_filter = st.multiselect("CRS", options=crs_options,
176
- default=["All/Not allocated"], key="crs_filter")
177
 
178
  with col5:
179
  min_budget = st.slider(
@@ -191,8 +188,7 @@ col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
191
 
192
  with col1_2:
193
  client_options = sorted(project_data["client"].dropna().unique().tolist())
194
- client_filter = st.multiselect("Client", options=["All/Not allocated"] + client_options,
195
- default=["All/Not allocated"], key="client_filter")
196
  with col2_2:
197
  st.empty()
198
  with col3_2:
@@ -200,25 +196,40 @@ with col3_2:
200
  with col4_2:
201
  st.empty()
202
  with col5_2:
203
- st.empty()
204
-
205
 
206
  ###########################################
207
  # Filter Controls - Row 3 (Remaining Filter)
208
  ###########################################
209
- col1_3, col2_3, col3_3 = st.columns([7, 23, 5])
210
  with col1_3:
211
  # Place the "Show only exact matches" checkbox here
212
  show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
213
  with col2_3:
214
  st.empty()
215
  with col3_3:
 
 
 
 
216
  # Right-align a more prominent reset button
217
  with st.container():
218
  st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
219
  if st.button("**Reset Filters**", key="reset_button_row3"):
220
  reset_filters()
221
  st.markdown("</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
222
  ###########################################
223
  # Main Search / Results
224
  ###########################################
@@ -237,7 +248,6 @@ else:
237
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
238
 
239
  # 2) Filter results based on the user’s selections
240
- # (Assuming filter_results can handle a string "All/Not allocated" as meaning "no filter")
241
  filtered_semantic = filter_results(
242
  semantic_thresholded,
243
  country_filter,
@@ -263,10 +273,10 @@ else:
263
  get_country_name
264
  )
265
 
266
- # Additional filter by client (if "All/Not allocated" is not selected)
267
- if "All/Not allocated" not in client_filter:
268
- filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
269
- filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
270
 
271
  # Remove duplicates
272
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
@@ -278,8 +288,11 @@ else:
278
  except (ValueError, TypeError):
279
  return value
280
 
281
- # --- Reprint Query (Left aligned with "Query:") ---
282
- st.markdown(f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
 
 
 
283
 
284
  # 3) Display results
285
  # Lexical Search Results Branch
@@ -303,7 +316,7 @@ else:
303
  if "page" not in st.session_state:
304
  st.session_state.page = 1
305
  current_page = st.session_state.page
306
- # Top pagination widget (right aligned, occupying 1/7 of page width)
307
  col_pag_top = st.columns([6, 1])[1]
308
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
309
  st.session_state.page = new_page_top
@@ -318,7 +331,7 @@ else:
318
  metadata["title"] = compute_title(metadata)
319
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
320
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
321
- # Prepend the result number and make title bold
322
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
323
 
324
  objective = metadata.get("objective", "None")
@@ -352,27 +365,28 @@ else:
352
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
353
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
354
 
355
- # Extract and format predecessor and successor project IDs
356
- predecessor_raw = metadata.get("predecessor_id", "")
357
- successor_raw = metadata.get("successor_id", "")
358
- predecessor = safe_format_project_id(predecessor_raw)
359
- successor = safe_format_project_id(successor_raw)
360
-
361
- pred_success_ids = ""
362
- if predecessor or successor:
363
- parts = []
364
- if predecessor:
365
- parts.append(f"**Predecessor Project:** {predecessor}")
366
- if successor:
367
- parts.append(f"**Successor Project:** {successor}")
368
- pred_success_ids = "<br>" + " ".join(parts)
369
-
 
370
 
371
  additional_text = (
372
  f"**Objective:** {highlight_query(objective, var)}<br>"
373
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
374
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
375
- f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
376
  + extra_line +
377
  f"<br>**Country:** {country_raw}<br>"
378
  f"**Sector:** {crs_combined}"
@@ -401,7 +415,7 @@ else:
401
  st.session_state.page = 1
402
  current_page = st.session_state.page
403
 
404
- # Top pagination widget (right aligned, occupying 1/7 of page width)
405
  col_pag_top = st.columns([6, 1])[1]
406
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
407
  st.session_state.page = new_page_top
@@ -410,12 +424,12 @@ else:
410
  end_index = start_index + page_size
411
  top_results = filtered_semantic_no_dupe[start_index:end_index]
412
 
413
- # Prominent page info with bold numbers (green if current page != 1)
414
  page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
415
  total_pages_str = f"<b>{total_pages}</b>"
416
  st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
417
 
418
- # --- RAG Answer (Left aligned, bullet points, with bold numbers) ---
419
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
420
  bullet_lines = []
421
  for line in rag_answer.splitlines():
@@ -463,27 +477,29 @@ else:
463
  new_crs_value = lookup_crs_value(crs_key_clean)
464
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
465
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
466
-
467
- # Extract and format predecessor and successor project IDs
468
- predecessor_raw = metadata.get("predecessor_id", "")
469
- successor_raw = metadata.get("successor_id", "")
470
- predecessor = safe_format_project_id(predecessor_raw)
471
- successor = safe_format_project_id(successor_raw)
472
 
473
- pred_success_ids = ""
474
- if predecessor or successor:
475
- parts = []
476
- if predecessor:
477
- parts.append(f"**Predecessor Project:** {predecessor}")
478
- if successor:
479
- parts.append(f"**Successor Project:** {successor}")
480
- pred_success_ids = "<br>" + " ".join(parts)
 
 
 
 
 
 
 
 
481
 
482
  additional_text = (
483
  f"**Objective:** {metadata.get('objective', '')}<br>"
484
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
485
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
486
- f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
487
  + extra_line +
488
  f"<br>**Country:** {country_raw}<br>"
489
  f"**Sector:** {crs_combined}"
@@ -494,7 +510,7 @@ else:
494
  st.markdown(additional_text, unsafe_allow_html=True)
495
  st.divider()
496
 
497
- # Bottom pagination widget (right aligned, occupying 1/7 width)
498
  col_pag_bot = st.columns([6, 1])[1]
499
  new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
500
  st.session_state.page = new_page_bot
 
22
  # TF-IDF part (excluded from the app for now)
23
  # from appStore.tfidf_extraction import extract_top_keywords
24
 
25
+ # Import helper modules, including format_project_id for formatting IDs
26
  from appStore.rag_utils import (
27
  highlight_query,
28
  get_rag_answer,
29
  compute_title,
30
+ format_project_id
31
  )
32
  from appStore.filter_utils import (
33
  parse_budget,
 
127
  # Define reset_filters function using session_state
128
  ###########################################
129
  def reset_filters():
130
+ st.session_state["region_filter"] = "All/Not allocated"
131
+ st.session_state["country_filter"] = "All/Not allocated"
132
  current_year = datetime.now().year
133
  default_start_year = current_year - 4
134
  st.session_state["end_year_range"] = (default_start_year, max_end_year)
135
+ st.session_state["crs_filter"] = "All/Not allocated"
136
  st.session_state["min_budget"] = min_budget_val
137
+ st.session_state["client_filter"] = "All/Not allocated"
138
  st.session_state["query"] = ""
139
  st.session_state["show_exact_matches"] = False
140
  st.session_state["page"] = 1
 
145
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
146
 
147
  with col1:
148
+ region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
149
+ if region_filter == "All/Not allocated":
 
150
  filtered_country_names = unique_country_names
151
  else:
152
  filtered_country_names = [
153
  name for name, code in country_name_mapping.items()
154
+ if iso_code_to_sub_region.get(code) == region_filter
155
  ]
156
 
157
  with col2:
158
+ country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names, key="country_filter")
 
159
 
160
  with col3:
161
  current_year = datetime.now().year
 
170
 
171
  with col4:
172
  crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
173
+ crs_filter = st.selectbox("CRS", crs_options, key="crs_filter")
 
174
 
175
  with col5:
176
  min_budget = st.slider(
 
188
 
189
  with col1_2:
190
  client_options = sorted(project_data["client"].dropna().unique().tolist())
191
+ client_filter = st.selectbox("Client", ["All/Not allocated"] + client_options, key="client_filter")
 
192
  with col2_2:
193
  st.empty()
194
  with col3_2:
 
196
  with col4_2:
197
  st.empty()
198
  with col5_2:
199
+ # Plain reset button (will be moved to row 3 as well)
200
+ st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
201
 
202
  ###########################################
203
  # Filter Controls - Row 3 (Remaining Filter)
204
  ###########################################
205
+ col1_3, col2_3, col3_3, col4_3, col5_3 = st.columns(5)
206
  with col1_3:
207
  # Place the "Show only exact matches" checkbox here
208
  show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
209
  with col2_3:
210
  st.empty()
211
  with col3_3:
212
+ st.empty()
213
+ with col4_3:
214
+ st.empty()
215
+ with col5_3:
216
  # Right-align a more prominent reset button
217
  with st.container():
218
  st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
219
  if st.button("**Reset Filters**", key="reset_button_row3"):
220
  reset_filters()
221
  st.markdown("</div>", unsafe_allow_html=True)
222
+
223
+ ###########################################
224
+ # Helper function for valid project id
225
+ ###########################################
226
+ def valid_project_id(pid_str):
227
+ if not pid_str:
228
+ return False
229
+ if pid_str.lower() in ["nan", "none"]:
230
+ return False
231
+ return True
232
+
233
  ###########################################
234
  # Main Search / Results
235
  ###########################################
 
248
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
249
 
250
  # 2) Filter results based on the user’s selections
 
251
  filtered_semantic = filter_results(
252
  semantic_thresholded,
253
  country_filter,
 
273
  get_country_name
274
  )
275
 
276
+ # Additional filter by client
277
+ if client_filter != "All/Not allocated":
278
+ filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
279
+ filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
280
 
281
  # Remove duplicates
282
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
 
288
  except (ValueError, TypeError):
289
  return value
290
 
291
+ # --- Reprint Query (Left Aligned with "Query:") ---
292
+ st.markdown(
293
+ f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>",
294
+ unsafe_allow_html=True
295
+ )
296
 
297
  # 3) Display results
298
  # Lexical Search Results Branch
 
316
  if "page" not in st.session_state:
317
  st.session_state.page = 1
318
  current_page = st.session_state.page
319
+ # Top pagination widget (right aligned, 1/7 width)
320
  col_pag_top = st.columns([6, 1])[1]
321
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
322
  st.session_state.page = new_page_top
 
331
  metadata["title"] = compute_title(metadata)
332
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
333
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
334
+ # Prepend the result number
335
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
336
 
337
  objective = metadata.get("objective", "None")
 
365
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
366
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
367
 
368
+ predecessor = metadata.get("predecessor_id", "").strip()
369
+ successor = metadata.get("successor_id", "").strip()
370
+ parts = []
371
+ if valid_project_id(predecessor):
372
+ try:
373
+ formatted_pred = format_project_id(int(float(predecessor)))
374
+ except Exception:
375
+ formatted_pred = predecessor
376
+ parts.append(f"**Predecessor Project:** {formatted_pred}")
377
+ if valid_project_id(successor):
378
+ try:
379
+ formatted_succ = format_project_id(int(float(successor)))
380
+ except Exception:
381
+ formatted_succ = successor
382
+ parts.append(f"**Successor Project:** {formatted_succ}")
383
+ extra_line = "<br>" + " | ".join(parts) if parts else ""
384
 
385
  additional_text = (
386
  f"**Objective:** {highlight_query(objective, var)}<br>"
387
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
388
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
389
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
390
  + extra_line +
391
  f"<br>**Country:** {country_raw}<br>"
392
  f"**Sector:** {crs_combined}"
 
415
  st.session_state.page = 1
416
  current_page = st.session_state.page
417
 
418
+ # Top pagination widget (right aligned, 1/7 width)
419
  col_pag_top = st.columns([6, 1])[1]
420
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
421
  st.session_state.page = new_page_top
 
424
  end_index = start_index + page_size
425
  top_results = filtered_semantic_no_dupe[start_index:end_index]
426
 
427
+ # Prominent page info with bold numbers and green highlight if current page is not 1
428
  page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
429
  total_pages_str = f"<b>{total_pages}</b>"
430
  st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
431
 
432
+ # --- RAG Answer (Left aligned, bullet points, bold numbers) ---
433
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
434
  bullet_lines = []
435
  for line in rag_answer.splitlines():
 
477
  new_crs_value = lookup_crs_value(crs_key_clean)
478
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
479
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
 
 
 
 
 
 
480
 
481
+ predecessor = metadata.get("predecessor_id", "").strip()
482
+ successor = metadata.get("successor_id", "").strip()
483
+ parts = []
484
+ if valid_project_id(predecessor):
485
+ try:
486
+ formatted_pred = format_project_id(int(float(predecessor)))
487
+ except Exception:
488
+ formatted_pred = predecessor
489
+ parts.append(f"**Predecessor Project:** {formatted_pred}")
490
+ if valid_project_id(successor):
491
+ try:
492
+ formatted_succ = format_project_id(int(float(successor)))
493
+ except Exception:
494
+ formatted_succ = successor
495
+ parts.append(f"**Successor Project:** {formatted_succ}")
496
+ extra_line = "<br>" + " | ".join(parts) if parts else ""
497
 
498
  additional_text = (
499
  f"**Objective:** {metadata.get('objective', '')}<br>"
500
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
501
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
502
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
503
  + extra_line +
504
  f"<br>**Country:** {country_raw}<br>"
505
  f"**Sector:** {crs_combined}"
 
510
  st.markdown(additional_text, unsafe_allow_html=True)
511
  st.divider()
512
 
513
+ # Bottom pagination widget (right aligned, 1/7 width)
514
  col_pag_bot = st.columns([6, 1])[1]
515
  new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
516
  st.session_state.page = new_page_bot