Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -93,8 +93,8 @@ with col_about:
|
|
93 |
""", unsafe_allow_html=True
|
94 |
)
|
95 |
|
96 |
-
# Main query input
|
97 |
-
var = st.text_input("Enter Question")
|
98 |
|
99 |
###########################################
|
100 |
# Create or load the embeddings collection
|
@@ -134,7 +134,8 @@ def reset_filters():
|
|
134 |
st.session_state["crs_filter"] = "All/Not allocated"
|
135 |
st.session_state["min_budget"] = min_budget_val
|
136 |
st.session_state["client_filter"] = "All/Not allocated"
|
137 |
-
|
|
|
138 |
st.session_state["page"] = 1
|
139 |
|
140 |
###########################################
|
@@ -144,7 +145,6 @@ col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
|
|
144 |
|
145 |
with col1:
|
146 |
region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
|
147 |
-
|
148 |
if region_filter == "All/Not allocated":
|
149 |
filtered_country_names = unique_country_names
|
150 |
else:
|
@@ -180,7 +180,6 @@ with col5:
|
|
180 |
key="min_budget"
|
181 |
)
|
182 |
|
183 |
-
|
184 |
###########################################
|
185 |
# Filter Controls - Row 2 (Additional Filters)
|
186 |
###########################################
|
@@ -196,10 +195,29 @@ with col3_2:
|
|
196 |
with col4_2:
|
197 |
st.empty()
|
198 |
with col5_2:
|
199 |
-
|
|
|
200 |
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
###########################################
|
205 |
# Main Search / Results
|
@@ -259,7 +277,11 @@ else:
|
|
259 |
except (ValueError, TypeError):
|
260 |
return value
|
261 |
|
|
|
|
|
|
|
262 |
# 3) Display results
|
|
|
263 |
if show_exact_matches:
|
264 |
st.write("Showing **Top Lexical Search results**")
|
265 |
query_substring = var.strip().lower()
|
@@ -272,13 +294,31 @@ else:
|
|
272 |
st.write('No exact matches, consider unchecking "Show only exact matches"')
|
273 |
else:
|
274 |
top_results = filtered_lexical_no_dupe # Show all matching lexical results
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
metadata = res.payload.get('metadata', {})
|
277 |
if "title" not in metadata:
|
278 |
metadata["title"] = compute_title(metadata)
|
279 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
280 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
281 |
-
|
|
|
282 |
|
283 |
objective = metadata.get("objective", "None")
|
284 |
desc_en = metadata.get("description.en", "").strip()
|
@@ -297,7 +337,6 @@ else:
|
|
297 |
if remainder_text:
|
298 |
with st.expander("Show more"):
|
299 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
300 |
-
|
301 |
with col_right:
|
302 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
303 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
@@ -311,21 +350,37 @@ else:
|
|
311 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
312 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
313 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
additional_text = (
|
316 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
317 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
318 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
319 |
-
f"**Budget:** Project: {formatted_project_budget}
|
320 |
-
|
|
|
321 |
f"**Sector:** {crs_combined}"
|
322 |
)
|
323 |
contact = metadata.get("contact", "").strip()
|
324 |
if contact and contact.lower() != "[email protected]":
|
325 |
additional_text += f"<br>**Contact:** [email protected]"
|
326 |
st.markdown(additional_text, unsafe_allow_html=True)
|
327 |
-
|
328 |
st.divider()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
else:
|
330 |
if not filtered_semantic_no_dupe:
|
331 |
st.write("No relevant results found.")
|
@@ -334,30 +389,43 @@ else:
|
|
334 |
total_results = len(filtered_semantic_no_dupe)
|
335 |
total_pages = (total_results - 1) // page_size + 1
|
336 |
|
337 |
-
# Use session_state for page selection; default to 1 if not set.
|
338 |
if "page" not in st.session_state:
|
339 |
st.session_state.page = 1
|
340 |
current_page = st.session_state.page
|
341 |
|
342 |
-
|
|
|
|
|
|
|
|
|
|
|
343 |
end_index = start_index + page_size
|
344 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
345 |
|
346 |
-
|
|
|
|
|
|
|
347 |
|
|
|
348 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
|
|
|
|
|
|
|
|
353 |
st.divider()
|
354 |
|
355 |
-
for res in top_results:
|
356 |
metadata = res.payload.get('metadata', {})
|
357 |
if "title" not in metadata:
|
358 |
metadata["title"] = compute_title(metadata)
|
359 |
title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
|
360 |
-
|
|
|
361 |
|
362 |
desc_en = metadata.get("description.en", "").strip()
|
363 |
desc_de = metadata.get("description.de", "").strip()
|
@@ -376,7 +444,6 @@ else:
|
|
376 |
if remainder_text:
|
377 |
with st.expander("Show more"):
|
378 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
379 |
-
|
380 |
with col_right:
|
381 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
382 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
@@ -390,22 +457,31 @@ else:
|
|
390 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
391 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
392 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
additional_text = (
|
395 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
396 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
397 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
398 |
-
f"**Budget:** Project: {formatted_project_budget}
|
399 |
-
|
|
|
400 |
f"**Sector:** {crs_combined}"
|
401 |
)
|
402 |
contact = metadata.get("contact", "").strip()
|
403 |
if contact and contact.lower() != "[email protected]":
|
404 |
additional_text += f"<br>**Contact:** [email protected]"
|
405 |
st.markdown(additional_text, unsafe_allow_html=True)
|
406 |
-
|
407 |
st.divider()
|
408 |
|
409 |
-
#
|
410 |
-
|
411 |
-
|
|
|
|
93 |
""", unsafe_allow_html=True
|
94 |
)
|
95 |
|
96 |
+
# Main query input (with a key so we can reset it)
|
97 |
+
var = st.text_input("Enter Question", key="query")
|
98 |
|
99 |
###########################################
|
100 |
# Create or load the embeddings collection
|
|
|
134 |
st.session_state["crs_filter"] = "All/Not allocated"
|
135 |
st.session_state["min_budget"] = min_budget_val
|
136 |
st.session_state["client_filter"] = "All/Not allocated"
|
137 |
+
st.session_state["query"] = ""
|
138 |
+
st.session_state["show_exact_matches"] = False
|
139 |
st.session_state["page"] = 1
|
140 |
|
141 |
###########################################
|
|
|
145 |
|
146 |
with col1:
|
147 |
region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
|
|
|
148 |
if region_filter == "All/Not allocated":
|
149 |
filtered_country_names = unique_country_names
|
150 |
else:
|
|
|
180 |
key="min_budget"
|
181 |
)
|
182 |
|
|
|
183 |
###########################################
|
184 |
# Filter Controls - Row 2 (Additional Filters)
|
185 |
###########################################
|
|
|
195 |
with col4_2:
|
196 |
st.empty()
|
197 |
with col5_2:
|
198 |
+
# Plain reset button (will be moved to row 3 as well)
|
199 |
+
st.button("Reset Filters", on_click=reset_filters, key="reset_button_row2")
|
200 |
|
201 |
+
###########################################
|
202 |
+
# Filter Controls - Row 3 (Remaining Filter)
|
203 |
+
###########################################
|
204 |
+
col1_3, col2_3, col3_3, col4_3, col5_3 = st.columns(5)
|
205 |
+
with col1_3:
|
206 |
+
# Place the "Show only exact matches" checkbox here
|
207 |
+
show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
|
208 |
+
with col2_3:
|
209 |
+
st.empty()
|
210 |
+
with col3_3:
|
211 |
+
st.empty()
|
212 |
+
with col4_3:
|
213 |
+
st.empty()
|
214 |
+
with col5_3:
|
215 |
+
# Right-align a more prominent reset button
|
216 |
+
with st.container():
|
217 |
+
st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
|
218 |
+
if st.button("**Reset Filters**", key="reset_button_row3"):
|
219 |
+
reset_filters()
|
220 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
221 |
|
222 |
###########################################
|
223 |
# Main Search / Results
|
|
|
277 |
except (ValueError, TypeError):
|
278 |
return value
|
279 |
|
280 |
+
# --- Reprint Query (Right Aligned with "Query:") ---
|
281 |
+
st.markdown(f"<div style='text-align: right; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
|
282 |
+
|
283 |
# 3) Display results
|
284 |
+
# Lexical Search Results Branch
|
285 |
if show_exact_matches:
|
286 |
st.write("Showing **Top Lexical Search results**")
|
287 |
query_substring = var.strip().lower()
|
|
|
294 |
st.write('No exact matches, consider unchecking "Show only exact matches"')
|
295 |
else:
|
296 |
top_results = filtered_lexical_no_dupe # Show all matching lexical results
|
297 |
+
|
298 |
+
# --- Pagination (Above Lexical Results) ---
|
299 |
+
page_size = 15
|
300 |
+
total_results = len(top_results)
|
301 |
+
total_pages = (total_results - 1) // page_size + 1
|
302 |
+
if "page" not in st.session_state:
|
303 |
+
st.session_state.page = 1
|
304 |
+
current_page = st.session_state.page
|
305 |
+
# Top pagination widget (right aligned, 1/7 width)
|
306 |
+
col_pag_top = st.columns([6, 1])[1]
|
307 |
+
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top")
|
308 |
+
st.session_state.page = new_page_top
|
309 |
+
|
310 |
+
start_index = (st.session_state.page - 1) * page_size
|
311 |
+
end_index = start_index + page_size
|
312 |
+
paged_results = top_results[start_index:end_index]
|
313 |
+
|
314 |
+
for i, res in enumerate(paged_results, start=start_index+1):
|
315 |
metadata = res.payload.get('metadata', {})
|
316 |
if "title" not in metadata:
|
317 |
metadata["title"] = compute_title(metadata)
|
318 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
319 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
320 |
+
# Prepend the result number
|
321 |
+
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
322 |
|
323 |
objective = metadata.get("objective", "None")
|
324 |
desc_en = metadata.get("description.en", "").strip()
|
|
|
337 |
if remainder_text:
|
338 |
with st.expander("Show more"):
|
339 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
|
|
340 |
with col_right:
|
341 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
342 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
|
|
350 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
351 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
352 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
353 |
+
|
354 |
+
# Insert Predecessor/Successor line if available
|
355 |
+
predecessor = metadata.get("predecessor_id", "").strip()
|
356 |
+
successor = metadata.get("successor_id", "").strip()
|
357 |
+
extra_line = ""
|
358 |
+
if predecessor:
|
359 |
+
extra_line += f"<br>**Predecessor Project:** {predecessor}"
|
360 |
+
if successor:
|
361 |
+
extra_line += f"<br>**Successor Project:** {successor}"
|
362 |
+
|
363 |
additional_text = (
|
364 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
365 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
366 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
367 |
+
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
|
368 |
+
+ extra_line +
|
369 |
+
f"<br>**Country:** {country_raw}<br>"
|
370 |
f"**Sector:** {crs_combined}"
|
371 |
)
|
372 |
contact = metadata.get("contact", "").strip()
|
373 |
if contact and contact.lower() != "[email protected]":
|
374 |
additional_text += f"<br>**Contact:** [email protected]"
|
375 |
st.markdown(additional_text, unsafe_allow_html=True)
|
|
|
376 |
st.divider()
|
377 |
+
|
378 |
+
# Bottom pagination widget
|
379 |
+
col_pag_bot = st.columns([6, 1])[1]
|
380 |
+
new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot")
|
381 |
+
st.session_state.page = new_page_bot
|
382 |
+
|
383 |
+
# Semantic Search Results Branch
|
384 |
else:
|
385 |
if not filtered_semantic_no_dupe:
|
386 |
st.write("No relevant results found.")
|
|
|
389 |
total_results = len(filtered_semantic_no_dupe)
|
390 |
total_pages = (total_results - 1) // page_size + 1
|
391 |
|
|
|
392 |
if "page" not in st.session_state:
|
393 |
st.session_state.page = 1
|
394 |
current_page = st.session_state.page
|
395 |
|
396 |
+
# Top pagination widget (right aligned, 1/7 width)
|
397 |
+
col_pag_top = st.columns([6, 1])[1]
|
398 |
+
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page_top_sem")
|
399 |
+
st.session_state.page = new_page_top
|
400 |
+
|
401 |
+
start_index = (st.session_state.page - 1) * page_size
|
402 |
end_index = start_index + page_size
|
403 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
404 |
|
405 |
+
# Prominent page info with bold numbers and green highlight if current page is not 1
|
406 |
+
page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
|
407 |
+
total_pages_str = f"<b>{total_pages}</b>"
|
408 |
+
st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
|
409 |
|
410 |
+
# --- RAG Answer (Right aligned, bullet points, bold numbers) ---
|
411 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
412 |
+
bullet_lines = []
|
413 |
+
for line in rag_answer.splitlines():
|
414 |
+
if line.strip():
|
415 |
+
# Bold any numbers in the line
|
416 |
+
line_bold = re.sub(r'(\d+)', r'<b>\1</b>', line)
|
417 |
+
bullet_lines.append(f"<li>{line_bold}</li>")
|
418 |
+
formatted_rag_answer = "<ul style='text-align: right; list-style-position: inside;'>" + "".join(bullet_lines) + "</ul>"
|
419 |
+
st.markdown(formatted_rag_answer, unsafe_allow_html=True)
|
420 |
st.divider()
|
421 |
|
422 |
+
for i, res in enumerate(top_results, start=start_index+1):
|
423 |
metadata = res.payload.get('metadata', {})
|
424 |
if "title" not in metadata:
|
425 |
metadata["title"] = compute_title(metadata)
|
426 |
title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
|
427 |
+
# Prepend result number and make title bold
|
428 |
+
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
429 |
|
430 |
desc_en = metadata.get("description.en", "").strip()
|
431 |
desc_de = metadata.get("description.de", "").strip()
|
|
|
444 |
if remainder_text:
|
445 |
with st.expander("Show more"):
|
446 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
|
|
447 |
with col_right:
|
448 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
449 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
|
|
457 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
458 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
459 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
460 |
+
|
461 |
+
predecessor = metadata.get("predecessor_id", "").strip()
|
462 |
+
successor = metadata.get("successor_id", "").strip()
|
463 |
+
extra_line = ""
|
464 |
+
if predecessor:
|
465 |
+
extra_line += f"<br>**Predecessor Project:** {predecessor}"
|
466 |
+
if successor:
|
467 |
+
extra_line += f"<br>**Successor Project:** {successor}"
|
468 |
+
|
469 |
additional_text = (
|
470 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
471 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
472 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
473 |
+
f"**Budget:** Project: <b>{formatted_project_budget}</b>, Total volume: <b>{formatted_total_volume}</b><br>"
|
474 |
+
+ extra_line +
|
475 |
+
f"<br>**Country:** {country_raw}<br>"
|
476 |
f"**Sector:** {crs_combined}"
|
477 |
)
|
478 |
contact = metadata.get("contact", "").strip()
|
479 |
if contact and contact.lower() != "[email protected]":
|
480 |
additional_text += f"<br>**Contact:** [email protected]"
|
481 |
st.markdown(additional_text, unsafe_allow_html=True)
|
|
|
482 |
st.divider()
|
483 |
|
484 |
+
# Bottom pagination widget (right aligned, 1/7 width)
|
485 |
+
col_pag_bot = st.columns([6, 1])[1]
|
486 |
+
new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)), index=st.session_state.page - 1, key="page_bot_sem")
|
487 |
+
st.session_state.page = new_page_bot
|