annikwag commited on
Commit
6a13cb6
·
verified ·
1 Parent(s): 4345f0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -60
app.py CHANGED
@@ -23,9 +23,17 @@ from appStore.region_utils import (
23
  # from appStore.tfidf_extraction import extract_top_keywords
24
 
25
  # Import helper modules
26
- from appStore.rag_utils import highlight_query, get_rag_answer, compute_title, format_project_id
27
-
28
- from appStore.filter_utils import parse_budget, filter_results, get_crs_options
 
 
 
 
 
 
 
 
29
 
30
  from appStore.crs_utils import lookup_crs_value
31
 
@@ -119,14 +127,14 @@ unique_country_names = sorted(country_name_mapping.keys())
119
  # Define reset_filters function using session_state
120
  ###########################################
121
  def reset_filters():
122
- st.session_state["region_filter"] = "All/Not allocated"
123
- st.session_state["country_filter"] = "All/Not allocated"
124
  current_year = datetime.now().year
125
  default_start_year = current_year - 4
126
  st.session_state["end_year_range"] = (default_start_year, max_end_year)
127
- st.session_state["crs_filter"] = "All/Not allocated"
128
  st.session_state["min_budget"] = min_budget_val
129
- st.session_state["client_filter"] = "All/Not allocated"
130
  st.session_state["query"] = ""
131
  st.session_state["show_exact_matches"] = False
132
  st.session_state["page"] = 1
@@ -137,17 +145,19 @@ def reset_filters():
137
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
138
 
139
  with col1:
140
- region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
141
- if region_filter == "All/Not allocated":
 
142
  filtered_country_names = unique_country_names
143
  else:
144
  filtered_country_names = [
145
  name for name, code in country_name_mapping.items()
146
- if iso_code_to_sub_region.get(code) == region_filter
147
  ]
148
 
149
  with col2:
150
- country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names, key="country_filter")
 
151
 
152
  with col3:
153
  current_year = datetime.now().year
@@ -162,7 +172,8 @@ with col3:
162
 
163
  with col4:
164
  crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
165
- crs_filter = st.selectbox("CRS", crs_options, key="crs_filter")
 
166
 
167
  with col5:
168
  min_budget = st.slider(
@@ -180,7 +191,8 @@ col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
180
 
181
  with col1_2:
182
  client_options = sorted(project_data["client"].dropna().unique().tolist())
183
- client_filter = st.selectbox("Client", ["All/Not allocated"] + client_options, key="client_filter")
 
184
  with col2_2:
185
  st.empty()
186
  with col3_2:
@@ -188,30 +200,25 @@ with col3_2:
188
  with col4_2:
189
  st.empty()
190
  with col5_2:
191
- # Plain reset button (will be moved to row 3 as well)
192
- st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
193
 
194
  ###########################################
195
  # Filter Controls - Row 3 (Remaining Filter)
196
  ###########################################
197
- col1_3, col2_3, col3_3, col4_3, col5_3 = st.columns(5)
198
  with col1_3:
199
  # Place the "Show only exact matches" checkbox here
200
  show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
201
  with col2_3:
202
  st.empty()
203
  with col3_3:
204
- st.empty()
205
- with col4_3:
206
- st.empty()
207
- with col5_3:
208
  # Right-align a more prominent reset button
209
  with st.container():
210
  st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
211
  if st.button("**Reset Filters**", key="reset_button_row3"):
212
  reset_filters()
213
  st.markdown("</div>", unsafe_allow_html=True)
214
-
215
  ###########################################
216
  # Main Search / Results
217
  ###########################################
@@ -230,6 +237,7 @@ else:
230
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
231
 
232
  # 2) Filter results based on the user’s selections
 
233
  filtered_semantic = filter_results(
234
  semantic_thresholded,
235
  country_filter,
@@ -255,10 +263,10 @@ else:
255
  get_country_name
256
  )
257
 
258
- # Additional filter by client
259
- if client_filter != "All/Not allocated":
260
- filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
261
- filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
262
 
263
  # Remove duplicates
264
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
@@ -270,11 +278,9 @@ else:
270
  except (ValueError, TypeError):
271
  return value
272
 
273
- # --- Reprint Query (Right Aligned with "Query:") ---
274
- st.markdown(
275
- f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>",
276
- unsafe_allow_html=True
277
- )
278
  # 3) Display results
279
  # Lexical Search Results Branch
280
  if show_exact_matches:
@@ -297,7 +303,7 @@ else:
297
  if "page" not in st.session_state:
298
  st.session_state.page = 1
299
  current_page = st.session_state.page
300
- # Top pagination widget (right aligned, 1/7 width)
301
  col_pag_top = st.columns([6, 1])[1]
302
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
303
  st.session_state.page = new_page_top
@@ -312,7 +318,7 @@ else:
312
  metadata["title"] = compute_title(metadata)
313
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
314
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
315
- # Prepend the result number
316
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
317
 
318
  objective = metadata.get("objective", "None")
@@ -346,25 +352,28 @@ else:
346
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
347
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
348
 
349
- # Insert Predecessor/Successor line if available
350
- predecessor = metadata.get("predecessor_id", "")
351
- successor = metadata.get("successor_id", "")
 
 
 
352
  pred_success_ids = ""
353
  if predecessor or successor:
354
- extra_line = "<br>"
355
  parts = []
356
  if predecessor:
357
- parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
358
  if successor:
359
- parts.append(f"**Successor Project:** {format_project_id(successor)}")
360
- pred_success_ids += " ".join(parts)
 
361
 
362
  additional_text = (
363
  f"**Objective:** {highlight_query(objective, var)}<br>"
364
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
365
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
366
- f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
367
- + pred_success_ids +
368
  f"<br>**Country:** {country_raw}<br>"
369
  f"**Sector:** {crs_combined}"
370
  )
@@ -392,7 +401,7 @@ else:
392
  st.session_state.page = 1
393
  current_page = st.session_state.page
394
 
395
- # Top pagination widget (right aligned, 1/7 width)
396
  col_pag_top = st.columns([6, 1])[1]
397
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
398
  st.session_state.page = new_page_top
@@ -401,24 +410,19 @@ else:
401
  end_index = start_index + page_size
402
  top_results = filtered_semantic_no_dupe[start_index:end_index]
403
 
404
- # Prominent page info with bold numbers and green highlight if current page is not 1
405
  page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
406
  total_pages_str = f"<b>{total_pages}</b>"
407
  st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
408
 
409
- # --- RAG Answer (Right aligned, bullet points, bold numbers) ---
410
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
411
  bullet_lines = []
412
  for line in rag_answer.splitlines():
413
  if line.strip():
414
- # Bold any numbers in the line
415
  line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
416
  bullet_lines.append(f"<li>{line_bold}</li>")
417
- formatted_rag_answer = (
418
- "<ul style='text-align: left; list-style-position: inside;'>"
419
- + "".join(bullet_lines)
420
- + "</ul>"
421
- )
422
  st.markdown(formatted_rag_answer, unsafe_allow_html=True)
423
  st.divider()
424
 
@@ -427,7 +431,6 @@ else:
427
  if "title" not in metadata:
428
  metadata["title"] = compute_title(metadata)
429
  title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
430
- # Prepend result number and make title bold
431
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
432
 
433
  desc_en = metadata.get("description.en", "").strip()
@@ -460,25 +463,28 @@ else:
460
  new_crs_value = lookup_crs_value(crs_key_clean)
461
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
462
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
 
 
 
 
 
 
463
 
464
- predecessor = metadata.get("predecessor_id", "").strip()
465
- successor = metadata.get("successor_id", "").strip()
466
  pred_success_ids = ""
467
  if predecessor or successor:
468
- extra_line = "<br>"
469
  parts = []
470
  if predecessor:
471
- parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
472
  if successor:
473
- parts.append(f"**Successor Project:** {format_project_id(successor)}")
474
- pred_success_ids += " ".join(parts)
475
 
476
  additional_text = (
477
  f"**Objective:** {metadata.get('objective', '')}<br>"
478
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
479
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
480
- f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
481
- + pred_success_ids +
482
  f"<br>**Country:** {country_raw}<br>"
483
  f"**Sector:** {crs_combined}"
484
  )
@@ -488,7 +494,7 @@ else:
488
  st.markdown(additional_text, unsafe_allow_html=True)
489
  st.divider()
490
 
491
- # Bottom pagination widget (right aligned, 1/7 width)
492
  col_pag_bot = st.columns([6, 1])[1]
493
  new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
494
  st.session_state.page = new_page_bot
 
23
  # from appStore.tfidf_extraction import extract_top_keywords
24
 
25
  # Import helper modules
26
+ from appStore.rag_utils import (
27
+ highlight_query,
28
+ get_rag_answer,
29
+ compute_title,
30
+ format_project_id # <-- Imported formatting function
31
+ )
32
+ from appStore.filter_utils import (
33
+ parse_budget,
34
+ filter_results,
35
+ get_crs_options
36
+ )
37
 
38
  from appStore.crs_utils import lookup_crs_value
39
 
 
127
  # Define reset_filters function using session_state
128
  ###########################################
129
  def reset_filters():
130
+ st.session_state["region_filter"] = ["All/Not allocated"]
131
+ st.session_state["country_filter"] = ["All/Not allocated"]
132
  current_year = datetime.now().year
133
  default_start_year = current_year - 4
134
  st.session_state["end_year_range"] = (default_start_year, max_end_year)
135
+ st.session_state["crs_filter"] = ["All/Not allocated"]
136
  st.session_state["min_budget"] = min_budget_val
137
+ st.session_state["client_filter"] = ["All/Not allocated"]
138
  st.session_state["query"] = ""
139
  st.session_state["show_exact_matches"] = False
140
  st.session_state["page"] = 1
 
145
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
146
 
147
  with col1:
148
+ region_filter = st.multiselect("Region", options=["All/Not allocated"] + sorted(unique_sub_regions),
149
+ default=["All/Not allocated"], key="region_filter")
150
+ if "All/Not allocated" in region_filter or not region_filter:
151
  filtered_country_names = unique_country_names
152
  else:
153
  filtered_country_names = [
154
  name for name, code in country_name_mapping.items()
155
+ if iso_code_to_sub_region.get(code) in region_filter
156
  ]
157
 
158
  with col2:
159
+ country_filter = st.multiselect("Country", options=["All/Not allocated"] + filtered_country_names,
160
+ default=["All/Not allocated"], key="country_filter")
161
 
162
  with col3:
163
  current_year = datetime.now().year
 
172
 
173
  with col4:
174
  crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
175
+ crs_filter = st.multiselect("CRS", options=crs_options,
176
+ default=["All/Not allocated"], key="crs_filter")
177
 
178
  with col5:
179
  min_budget = st.slider(
 
191
 
192
  with col1_2:
193
  client_options = sorted(project_data["client"].dropna().unique().tolist())
194
+ client_filter = st.multiselect("Client", options=["All/Not allocated"] + client_options,
195
+ default=["All/Not allocated"], key="client_filter")
196
  with col2_2:
197
  st.empty()
198
  with col3_2:
 
200
  with col4_2:
201
  st.empty()
202
  with col5_2:
203
+ st.empty()
204
+
205
 
206
  ###########################################
207
  # Filter Controls - Row 3 (Remaining Filter)
208
  ###########################################
209
+ col1_3, col2_3, col3_3 = st.columns([7, 23, 5])
210
  with col1_3:
211
  # Place the "Show only exact matches" checkbox here
212
  show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
213
  with col2_3:
214
  st.empty()
215
  with col3_3:
 
 
 
 
216
  # Right-align a more prominent reset button
217
  with st.container():
218
  st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
219
  if st.button("**Reset Filters**", key="reset_button_row3"):
220
  reset_filters()
221
  st.markdown("</div>", unsafe_allow_html=True)
 
222
  ###########################################
223
  # Main Search / Results
224
  ###########################################
 
237
  semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
238
 
239
  # 2) Filter results based on the user’s selections
240
+ # (Assuming filter_results can handle a string "All/Not allocated" as meaning "no filter")
241
  filtered_semantic = filter_results(
242
  semantic_thresholded,
243
  country_filter,
 
263
  get_country_name
264
  )
265
 
266
+ # Additional filter by client (if "All/Not allocated" is not selected)
267
+ if "All/Not allocated" not in client_filter:
268
+ filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
269
+ filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") in client_filter]
270
 
271
  # Remove duplicates
272
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
 
278
  except (ValueError, TypeError):
279
  return value
280
 
281
+ # --- Reprint Query (Left aligned with "Query:") ---
282
+ st.markdown(f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
283
+
 
 
284
  # 3) Display results
285
  # Lexical Search Results Branch
286
  if show_exact_matches:
 
303
  if "page" not in st.session_state:
304
  st.session_state.page = 1
305
  current_page = st.session_state.page
306
+ # Top pagination widget (right aligned, occupying 1/7 of page width)
307
  col_pag_top = st.columns([6, 1])[1]
308
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
309
  st.session_state.page = new_page_top
 
318
  metadata["title"] = compute_title(metadata)
319
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
320
  title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
321
+ # Prepend the result number and make title bold
322
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
323
 
324
  objective = metadata.get("objective", "None")
 
352
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
353
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
354
 
355
+ # Extract and format predecessor and successor project IDs
356
+ predecessor_raw = metadata.get("predecessor_id", "")
357
+ successor_raw = metadata.get("successor_id", "")
358
+ predecessor = safe_format_project_id(predecessor_raw)
359
+ successor = safe_format_project_id(successor_raw)
360
+
361
  pred_success_ids = ""
362
  if predecessor or successor:
 
363
  parts = []
364
  if predecessor:
365
+ parts.append(f"**Predecessor Project:** {predecessor}")
366
  if successor:
367
+ parts.append(f"**Successor Project:** {successor}")
368
+ pred_success_ids = "<br>" + " ".join(parts)
369
+
370
 
371
  additional_text = (
372
  f"**Objective:** {highlight_query(objective, var)}<br>"
373
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
374
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
375
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
376
+ + extra_line +
377
  f"<br>**Country:** {country_raw}<br>"
378
  f"**Sector:** {crs_combined}"
379
  )
 
401
  st.session_state.page = 1
402
  current_page = st.session_state.page
403
 
404
+ # Top pagination widget (right aligned, occupying 1/7 of page width)
405
  col_pag_top = st.columns([6, 1])[1]
406
  new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
407
  st.session_state.page = new_page_top
 
410
  end_index = start_index + page_size
411
  top_results = filtered_semantic_no_dupe[start_index:end_index]
412
 
413
+ # Prominent page info with bold numbers (green if current page != 1)
414
  page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
415
  total_pages_str = f"<b>{total_pages}</b>"
416
  st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
417
 
418
+ # --- RAG Answer (Left aligned, bullet points, with bold numbers) ---
419
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
420
  bullet_lines = []
421
  for line in rag_answer.splitlines():
422
  if line.strip():
 
423
  line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
424
  bullet_lines.append(f"<li>{line_bold}</li>")
425
+ formatted_rag_answer = "<ul style='text-align: left; list-style-position: inside;'>" + "".join(bullet_lines) + "</ul>"
 
 
 
 
426
  st.markdown(formatted_rag_answer, unsafe_allow_html=True)
427
  st.divider()
428
 
 
431
  if "title" not in metadata:
432
  metadata["title"] = compute_title(metadata)
433
  title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
 
434
  st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
435
 
436
  desc_en = metadata.get("description.en", "").strip()
 
463
  new_crs_value = lookup_crs_value(crs_key_clean)
464
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
465
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
466
+
467
+ # Extract and format predecessor and successor project IDs
468
+ predecessor_raw = metadata.get("predecessor_id", "")
469
+ successor_raw = metadata.get("successor_id", "")
470
+ predecessor = safe_format_project_id(predecessor_raw)
471
+ successor = safe_format_project_id(successor_raw)
472
 
 
 
473
  pred_success_ids = ""
474
  if predecessor or successor:
 
475
  parts = []
476
  if predecessor:
477
+ parts.append(f"**Predecessor Project:** {predecessor}")
478
  if successor:
479
+ parts.append(f"**Successor Project:** {successor}")
480
+ pred_success_ids = "<br>" + " ".join(parts)
481
 
482
  additional_text = (
483
  f"**Objective:** {metadata.get('objective', '')}<br>"
484
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
485
  f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
486
+ f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
487
+ + extra_line +
488
  f"<br>**Country:** {country_raw}<br>"
489
  f"**Sector:** {crs_combined}"
490
  )
 
494
  st.markdown(additional_text, unsafe_allow_html=True)
495
  st.divider()
496
 
497
+ # Bottom pagination widget (right aligned, occupying 1/7 width)
498
  col_pag_bot = st.columns([6, 1])[1]
499
  new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
500
  st.session_state.page = new_page_bot