Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -22,12 +22,12 @@ from appStore.region_utils import (
|
|
22 |
# TF-IDF part (excluded from the app for now)
|
23 |
# from appStore.tfidf_extraction import extract_top_keywords
|
24 |
|
25 |
-
# Import helper modules
|
26 |
from appStore.rag_utils import (
|
27 |
highlight_query,
|
28 |
get_rag_answer,
|
29 |
compute_title,
|
30 |
-
format_project_id
|
31 |
)
|
32 |
from appStore.filter_utils import (
|
33 |
parse_budget,
|
@@ -127,14 +127,14 @@ unique_country_names = sorted(country_name_mapping.keys())
|
|
127 |
# Define reset_filters function using session_state
|
128 |
###########################################
|
129 |
def reset_filters():
|
130 |
-
st.session_state["region_filter"] =
|
131 |
-
st.session_state["country_filter"] =
|
132 |
current_year = datetime.now().year
|
133 |
default_start_year = current_year - 4
|
134 |
st.session_state["end_year_range"] = (default_start_year, max_end_year)
|
135 |
-
st.session_state["crs_filter"] =
|
136 |
st.session_state["min_budget"] = min_budget_val
|
137 |
-
st.session_state["client_filter"] =
|
138 |
st.session_state["query"] = ""
|
139 |
st.session_state["show_exact_matches"] = False
|
140 |
st.session_state["page"] = 1
|
@@ -145,19 +145,17 @@ def reset_filters():
|
|
145 |
col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
|
146 |
|
147 |
with col1:
|
148 |
-
region_filter = st.
|
149 |
-
|
150 |
-
if "All/Not allocated" in region_filter or not region_filter:
|
151 |
filtered_country_names = unique_country_names
|
152 |
else:
|
153 |
filtered_country_names = [
|
154 |
name for name, code in country_name_mapping.items()
|
155 |
-
if iso_code_to_sub_region.get(code)
|
156 |
]
|
157 |
|
158 |
with col2:
|
159 |
-
country_filter = st.
|
160 |
-
default=["All/Not allocated"], key="country_filter")
|
161 |
|
162 |
with col3:
|
163 |
current_year = datetime.now().year
|
@@ -172,8 +170,7 @@ with col3:
|
|
172 |
|
173 |
with col4:
|
174 |
crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
|
175 |
-
crs_filter = st.
|
176 |
-
default=["All/Not allocated"], key="crs_filter")
|
177 |
|
178 |
with col5:
|
179 |
min_budget = st.slider(
|
@@ -191,8 +188,7 @@ col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
|
|
191 |
|
192 |
with col1_2:
|
193 |
client_options = sorted(project_data["client"].dropna().unique().tolist())
|
194 |
-
client_filter = st.
|
195 |
-
default=["All/Not allocated"], key="client_filter")
|
196 |
with col2_2:
|
197 |
st.empty()
|
198 |
with col3_2:
|
@@ -200,25 +196,40 @@ with col3_2:
|
|
200 |
with col4_2:
|
201 |
st.empty()
|
202 |
with col5_2:
|
203 |
-
|
204 |
-
|
205 |
|
206 |
###########################################
|
207 |
# Filter Controls - Row 3 (Remaining Filter)
|
208 |
###########################################
|
209 |
-
col1_3, col2_3, col3_3 = st.columns(
|
210 |
with col1_3:
|
211 |
# Place the "Show only exact matches" checkbox here
|
212 |
show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
|
213 |
with col2_3:
|
214 |
st.empty()
|
215 |
with col3_3:
|
|
|
|
|
|
|
|
|
216 |
# Right-align a more prominent reset button
|
217 |
with st.container():
|
218 |
st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
|
219 |
if st.button("**Reset Filters**", key="reset_button_row3"):
|
220 |
reset_filters()
|
221 |
st.markdown("</div>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
###########################################
|
223 |
# Main Search / Results
|
224 |
###########################################
|
@@ -237,7 +248,6 @@ else:
|
|
237 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
238 |
|
239 |
# 2) Filter results based on the user’s selections
|
240 |
-
# (Assuming filter_results can handle a string "All/Not allocated" as meaning "no filter")
|
241 |
filtered_semantic = filter_results(
|
242 |
semantic_thresholded,
|
243 |
country_filter,
|
@@ -263,10 +273,10 @@ else:
|
|
263 |
get_country_name
|
264 |
)
|
265 |
|
266 |
-
# Additional filter by client
|
267 |
-
if "All/Not allocated"
|
268 |
-
filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client")
|
269 |
-
filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client")
|
270 |
|
271 |
# Remove duplicates
|
272 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
|
@@ -278,8 +288,11 @@ else:
|
|
278 |
except (ValueError, TypeError):
|
279 |
return value
|
280 |
|
281 |
-
# --- Reprint Query (Left
|
282 |
-
st.markdown(
|
|
|
|
|
|
|
283 |
|
284 |
# 3) Display results
|
285 |
# Lexical Search Results Branch
|
@@ -303,7 +316,7 @@ else:
|
|
303 |
if "page" not in st.session_state:
|
304 |
st.session_state.page = 1
|
305 |
current_page = st.session_state.page
|
306 |
-
# Top pagination widget (right aligned,
|
307 |
col_pag_top = st.columns([6, 1])[1]
|
308 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
|
309 |
st.session_state.page = new_page_top
|
@@ -318,7 +331,7 @@ else:
|
|
318 |
metadata["title"] = compute_title(metadata)
|
319 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
320 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
321 |
-
# Prepend the result number
|
322 |
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
323 |
|
324 |
objective = metadata.get("objective", "None")
|
@@ -352,27 +365,28 @@ else:
|
|
352 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
353 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
354 |
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
parts
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
|
|
370 |
|
371 |
additional_text = (
|
372 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
373 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
374 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
375 |
-
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
|
376 |
+ extra_line +
|
377 |
f"<br>**Country:** {country_raw}<br>"
|
378 |
f"**Sector:** {crs_combined}"
|
@@ -401,7 +415,7 @@ else:
|
|
401 |
st.session_state.page = 1
|
402 |
current_page = st.session_state.page
|
403 |
|
404 |
-
# Top pagination widget (right aligned,
|
405 |
col_pag_top = st.columns([6, 1])[1]
|
406 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
|
407 |
st.session_state.page = new_page_top
|
@@ -410,12 +424,12 @@ else:
|
|
410 |
end_index = start_index + page_size
|
411 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
412 |
|
413 |
-
# Prominent page info with bold numbers
|
414 |
page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
|
415 |
total_pages_str = f"<b>{total_pages}</b>"
|
416 |
st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
|
417 |
|
418 |
-
# --- RAG Answer (Left aligned, bullet points,
|
419 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
420 |
bullet_lines = []
|
421 |
for line in rag_answer.splitlines():
|
@@ -463,27 +477,29 @@ else:
|
|
463 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
464 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
465 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
466 |
-
|
467 |
-
# Extract and format predecessor and successor project IDs
|
468 |
-
predecessor_raw = metadata.get("predecessor_id", "")
|
469 |
-
successor_raw = metadata.get("successor_id", "")
|
470 |
-
predecessor = safe_format_project_id(predecessor_raw)
|
471 |
-
successor = safe_format_project_id(successor_raw)
|
472 |
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
|
482 |
additional_text = (
|
483 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
484 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
485 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
486 |
-
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b>"
|
487 |
+ extra_line +
|
488 |
f"<br>**Country:** {country_raw}<br>"
|
489 |
f"**Sector:** {crs_combined}"
|
@@ -494,7 +510,7 @@ else:
|
|
494 |
st.markdown(additional_text, unsafe_allow_html=True)
|
495 |
st.divider()
|
496 |
|
497 |
-
# Bottom pagination widget (right aligned,
|
498 |
col_pag_bot = st.columns([6, 1])[1]
|
499 |
new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
|
500 |
st.session_state.page = new_page_bot
|
|
|
22 |
# TF-IDF part (excluded from the app for now)
|
23 |
# from appStore.tfidf_extraction import extract_top_keywords
|
24 |
|
25 |
+
# Import helper modules, including format_project_id for formatting IDs
|
26 |
from appStore.rag_utils import (
|
27 |
highlight_query,
|
28 |
get_rag_answer,
|
29 |
compute_title,
|
30 |
+
format_project_id
|
31 |
)
|
32 |
from appStore.filter_utils import (
|
33 |
parse_budget,
|
|
|
127 |
# Define reset_filters function using session_state
|
128 |
###########################################
|
129 |
def reset_filters():
|
130 |
+
st.session_state["region_filter"] = "All/Not allocated"
|
131 |
+
st.session_state["country_filter"] = "All/Not allocated"
|
132 |
current_year = datetime.now().year
|
133 |
default_start_year = current_year - 4
|
134 |
st.session_state["end_year_range"] = (default_start_year, max_end_year)
|
135 |
+
st.session_state["crs_filter"] = "All/Not allocated"
|
136 |
st.session_state["min_budget"] = min_budget_val
|
137 |
+
st.session_state["client_filter"] = "All/Not allocated"
|
138 |
st.session_state["query"] = ""
|
139 |
st.session_state["show_exact_matches"] = False
|
140 |
st.session_state["page"] = 1
|
|
|
145 |
col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
|
146 |
|
147 |
with col1:
|
148 |
+
region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
|
149 |
+
if region_filter == "All/Not allocated":
|
|
|
150 |
filtered_country_names = unique_country_names
|
151 |
else:
|
152 |
filtered_country_names = [
|
153 |
name for name, code in country_name_mapping.items()
|
154 |
+
if iso_code_to_sub_region.get(code) == region_filter
|
155 |
]
|
156 |
|
157 |
with col2:
|
158 |
+
country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names, key="country_filter")
|
|
|
159 |
|
160 |
with col3:
|
161 |
current_year = datetime.now().year
|
|
|
170 |
|
171 |
with col4:
|
172 |
crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
|
173 |
+
crs_filter = st.selectbox("CRS", crs_options, key="crs_filter")
|
|
|
174 |
|
175 |
with col5:
|
176 |
min_budget = st.slider(
|
|
|
188 |
|
189 |
with col1_2:
|
190 |
client_options = sorted(project_data["client"].dropna().unique().tolist())
|
191 |
+
client_filter = st.selectbox("Client", ["All/Not allocated"] + client_options, key="client_filter")
|
|
|
192 |
with col2_2:
|
193 |
st.empty()
|
194 |
with col3_2:
|
|
|
196 |
with col4_2:
|
197 |
st.empty()
|
198 |
with col5_2:
|
199 |
+
# Plain reset button (will be moved to row 3 as well)
|
200 |
+
st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
|
201 |
|
202 |
###########################################
|
203 |
# Filter Controls - Row 3 (Remaining Filter)
|
204 |
###########################################
|
205 |
+
col1_3, col2_3, col3_3, col4_3, col5_3 = st.columns(5)
|
206 |
with col1_3:
|
207 |
# Place the "Show only exact matches" checkbox here
|
208 |
show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
|
209 |
with col2_3:
|
210 |
st.empty()
|
211 |
with col3_3:
|
212 |
+
st.empty()
|
213 |
+
with col4_3:
|
214 |
+
st.empty()
|
215 |
+
with col5_3:
|
216 |
# Right-align a more prominent reset button
|
217 |
with st.container():
|
218 |
st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
|
219 |
if st.button("**Reset Filters**", key="reset_button_row3"):
|
220 |
reset_filters()
|
221 |
st.markdown("</div>", unsafe_allow_html=True)
|
222 |
+
|
223 |
+
###########################################
|
224 |
+
# Helper function for valid project id
|
225 |
+
###########################################
|
226 |
+
def valid_project_id(pid_str):
|
227 |
+
if not pid_str:
|
228 |
+
return False
|
229 |
+
if pid_str.lower() in ["nan", "none"]:
|
230 |
+
return False
|
231 |
+
return True
|
232 |
+
|
233 |
###########################################
|
234 |
# Main Search / Results
|
235 |
###########################################
|
|
|
248 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
249 |
|
250 |
# 2) Filter results based on the user’s selections
|
|
|
251 |
filtered_semantic = filter_results(
|
252 |
semantic_thresholded,
|
253 |
country_filter,
|
|
|
273 |
get_country_name
|
274 |
)
|
275 |
|
276 |
+
# Additional filter by client
|
277 |
+
if client_filter != "All/Not allocated":
|
278 |
+
filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
|
279 |
+
filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
|
280 |
|
281 |
# Remove duplicates
|
282 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
|
|
|
288 |
except (ValueError, TypeError):
|
289 |
return value
|
290 |
|
291 |
+
# --- Reprint Query (Left Aligned with "Query:") ---
|
292 |
+
st.markdown(
|
293 |
+
f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>",
|
294 |
+
unsafe_allow_html=True
|
295 |
+
)
|
296 |
|
297 |
# 3) Display results
|
298 |
# Lexical Search Results Branch
|
|
|
316 |
if "page" not in st.session_state:
|
317 |
st.session_state.page = 1
|
318 |
current_page = st.session_state.page
|
319 |
+
# Top pagination widget (right aligned, 1/7 width)
|
320 |
col_pag_top = st.columns([6, 1])[1]
|
321 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
|
322 |
st.session_state.page = new_page_top
|
|
|
331 |
metadata["title"] = compute_title(metadata)
|
332 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
333 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
334 |
+
# Prepend the result number
|
335 |
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
336 |
|
337 |
objective = metadata.get("objective", "None")
|
|
|
365 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
366 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
367 |
|
368 |
+
predecessor = metadata.get("predecessor_id", "").strip()
|
369 |
+
successor = metadata.get("successor_id", "").strip()
|
370 |
+
parts = []
|
371 |
+
if valid_project_id(predecessor):
|
372 |
+
try:
|
373 |
+
formatted_pred = format_project_id(int(float(predecessor)))
|
374 |
+
except Exception:
|
375 |
+
formatted_pred = predecessor
|
376 |
+
parts.append(f"**Predecessor Project:** {formatted_pred}")
|
377 |
+
if valid_project_id(successor):
|
378 |
+
try:
|
379 |
+
formatted_succ = format_project_id(int(float(successor)))
|
380 |
+
except Exception:
|
381 |
+
formatted_succ = successor
|
382 |
+
parts.append(f"**Successor Project:** {formatted_succ}")
|
383 |
+
extra_line = "<br>" + " | ".join(parts) if parts else ""
|
384 |
|
385 |
additional_text = (
|
386 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
387 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
388 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
389 |
+
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
|
390 |
+ extra_line +
|
391 |
f"<br>**Country:** {country_raw}<br>"
|
392 |
f"**Sector:** {crs_combined}"
|
|
|
415 |
st.session_state.page = 1
|
416 |
current_page = st.session_state.page
|
417 |
|
418 |
+
# Top pagination widget (right aligned, 1/7 width)
|
419 |
col_pag_top = st.columns([6, 1])[1]
|
420 |
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
|
421 |
st.session_state.page = new_page_top
|
|
|
424 |
end_index = start_index + page_size
|
425 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
426 |
|
427 |
+
# Prominent page info with bold numbers and green highlight if current page is not 1
|
428 |
page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
|
429 |
total_pages_str = f"<b>{total_pages}</b>"
|
430 |
st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
|
431 |
|
432 |
+
# --- RAG Answer (Left aligned, bullet points, bold numbers) ---
|
433 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
434 |
bullet_lines = []
|
435 |
for line in rag_answer.splitlines():
|
|
|
477 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
478 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
479 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
|
481 |
+
predecessor = metadata.get("predecessor_id", "").strip()
|
482 |
+
successor = metadata.get("successor_id", "").strip()
|
483 |
+
parts = []
|
484 |
+
if valid_project_id(predecessor):
|
485 |
+
try:
|
486 |
+
formatted_pred = format_project_id(int(float(predecessor)))
|
487 |
+
except Exception:
|
488 |
+
formatted_pred = predecessor
|
489 |
+
parts.append(f"**Predecessor Project:** {formatted_pred}")
|
490 |
+
if valid_project_id(successor):
|
491 |
+
try:
|
492 |
+
formatted_succ = format_project_id(int(float(successor)))
|
493 |
+
except Exception:
|
494 |
+
formatted_succ = successor
|
495 |
+
parts.append(f"**Successor Project:** {formatted_succ}")
|
496 |
+
extra_line = "<br>" + " | ".join(parts) if parts else ""
|
497 |
|
498 |
additional_text = (
|
499 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
500 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
501 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
502 |
+
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
|
503 |
+ extra_line +
|
504 |
f"<br>**Country:** {country_raw}<br>"
|
505 |
f"**Sector:** {crs_combined}"
|
|
|
510 |
st.markdown(additional_text, unsafe_allow_html=True)
|
511 |
st.divider()
|
512 |
|
513 |
+
# Bottom pagination widget (right aligned, 1/7 width)
|
514 |
col_pag_bot = st.columns([6, 1])[1]
|
515 |
new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
|
516 |
st.session_state.page = new_page_bot
|