Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -26,8 +26,7 @@ from appStore.region_utils import (
|
|
26 |
from appStore.rag_utils import (
|
27 |
highlight_query,
|
28 |
get_rag_answer,
|
29 |
-
compute_title
|
30 |
-
format_project_id # formatting function for project IDs
|
31 |
)
|
32 |
from appStore.filter_utils import (
|
33 |
parse_budget,
|
@@ -94,8 +93,8 @@ with col_about:
|
|
94 |
""", unsafe_allow_html=True
|
95 |
)
|
96 |
|
97 |
-
# Main query input
|
98 |
-
var = st.text_input("Enter Question"
|
99 |
|
100 |
###########################################
|
101 |
# Create or load the embeddings collection
|
@@ -127,16 +126,15 @@ unique_country_names = sorted(country_name_mapping.keys())
|
|
127 |
# Define reset_filters function using session_state
|
128 |
###########################################
|
129 |
def reset_filters():
|
130 |
-
st.session_state["region_filter"] =
|
131 |
-
st.session_state["country_filter"] =
|
132 |
current_year = datetime.now().year
|
133 |
default_start_year = current_year - 4
|
134 |
st.session_state["end_year_range"] = (default_start_year, max_end_year)
|
135 |
-
st.session_state["crs_filter"] =
|
136 |
st.session_state["min_budget"] = min_budget_val
|
137 |
-
st.session_state["client_filter"] =
|
138 |
-
|
139 |
-
st.session_state["show_exact_matches"] = False
|
140 |
st.session_state["page"] = 1
|
141 |
|
142 |
###########################################
|
@@ -145,23 +143,18 @@ def reset_filters():
|
|
145 |
col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
|
146 |
|
147 |
with col1:
|
148 |
-
region_filter = st.
|
149 |
-
|
150 |
-
|
151 |
-
)
|
152 |
-
if "All/Not allocated" in region_filter or not region_filter:
|
153 |
filtered_country_names = unique_country_names
|
154 |
else:
|
155 |
filtered_country_names = [
|
156 |
name for name, code in country_name_mapping.items()
|
157 |
-
if iso_code_to_sub_region.get(code)
|
158 |
]
|
159 |
|
160 |
with col2:
|
161 |
-
country_filter = st.
|
162 |
-
"Country", options=["All/Not allocated"] + filtered_country_names,
|
163 |
-
default=["All/Not allocated"], key="country_filter"
|
164 |
-
)
|
165 |
|
166 |
with col3:
|
167 |
current_year = datetime.now().year
|
@@ -176,10 +169,7 @@ with col3:
|
|
176 |
|
177 |
with col4:
|
178 |
crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
|
179 |
-
crs_filter = st.
|
180 |
-
"CRS", options=crs_options,
|
181 |
-
default=["All/Not allocated"], key="crs_filter"
|
182 |
-
)
|
183 |
|
184 |
with col5:
|
185 |
min_budget = st.slider(
|
@@ -190,6 +180,7 @@ with col5:
|
|
190 |
key="min_budget"
|
191 |
)
|
192 |
|
|
|
193 |
###########################################
|
194 |
# Filter Controls - Row 2 (Additional Filters)
|
195 |
###########################################
|
@@ -197,10 +188,7 @@ col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
|
|
197 |
|
198 |
with col1_2:
|
199 |
client_options = sorted(project_data["client"].dropna().unique().tolist())
|
200 |
-
client_filter = st.
|
201 |
-
"Client", options=["All/Not allocated"] + client_options,
|
202 |
-
default=["All/Not allocated"], key="client_filter"
|
203 |
-
)
|
204 |
with col2_2:
|
205 |
st.empty()
|
206 |
with col3_2:
|
@@ -208,42 +196,10 @@ with col3_2:
|
|
208 |
with col4_2:
|
209 |
st.empty()
|
210 |
with col5_2:
|
211 |
-
st.button("Reset Filters", on_click=reset_filters
|
212 |
|
213 |
-
###########################################
|
214 |
-
# Filter Controls - Row 3 (Remaining Filter)
|
215 |
-
###########################################
|
216 |
-
col1_3, col2_3, col3_3, col4_3, col5_3 = st.columns(5)
|
217 |
-
with col1_3:
|
218 |
-
# Place the "Show only exact matches" checkbox here
|
219 |
-
show_exact_matches = st.checkbox("Show only exact matches", key="show_exact_matches")
|
220 |
-
with col2_3:
|
221 |
-
st.empty()
|
222 |
-
with col3_3:
|
223 |
-
st.empty()
|
224 |
-
with col4_3:
|
225 |
-
st.empty()
|
226 |
-
with col5_3:
|
227 |
-
# Right-align a more prominent reset button
|
228 |
-
with st.container():
|
229 |
-
st.markdown("<div style='text-align: right;'>", unsafe_allow_html=True)
|
230 |
-
if st.button("**Reset Filters**", key="reset_button_row3"):
|
231 |
-
reset_filters()
|
232 |
-
st.markdown("</div>", unsafe_allow_html=True)
|
233 |
-
|
234 |
-
###########################################
|
235 |
-
# Helper function to process multiselect values
|
236 |
-
###########################################
|
237 |
-
def process_multiselect(filter_list):
|
238 |
-
if not filter_list or "All/Not allocated" in filter_list:
|
239 |
-
return None
|
240 |
-
return filter_list
|
241 |
|
242 |
-
|
243 |
-
region_filter_val = process_multiselect(region_filter)
|
244 |
-
country_filter_val = process_multiselect(country_filter)
|
245 |
-
crs_filter_val = process_multiselect(crs_filter)
|
246 |
-
client_filter_val = process_multiselect(client_filter)
|
247 |
|
248 |
###########################################
|
249 |
# Main Search / Results
|
@@ -262,13 +218,13 @@ else:
|
|
262 |
# Apply threshold to semantic results if desired
|
263 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
264 |
|
265 |
-
# 2) Filter results based on the user’s selections
|
266 |
filtered_semantic = filter_results(
|
267 |
semantic_thresholded,
|
268 |
-
|
269 |
-
|
270 |
end_year_range,
|
271 |
-
|
272 |
min_budget,
|
273 |
region_df,
|
274 |
iso_code_to_sub_region,
|
@@ -277,10 +233,10 @@ else:
|
|
277 |
)
|
278 |
filtered_lexical = filter_results(
|
279 |
lexical_all,
|
280 |
-
|
281 |
-
|
282 |
end_year_range,
|
283 |
-
|
284 |
min_budget,
|
285 |
region_df,
|
286 |
iso_code_to_sub_region,
|
@@ -288,10 +244,10 @@ else:
|
|
288 |
get_country_name
|
289 |
)
|
290 |
|
291 |
-
# Additional filter by client
|
292 |
-
if
|
293 |
-
filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client")
|
294 |
-
filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client")
|
295 |
|
296 |
# Remove duplicates
|
297 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
|
@@ -303,11 +259,7 @@ else:
|
|
303 |
except (ValueError, TypeError):
|
304 |
return value
|
305 |
|
306 |
-
# --- Reprint Query (Left aligned with "Query:") ---
|
307 |
-
st.markdown(f"<div style='text-align: left; font-size:2.1em; font-style: italic; font-weight: bold;'>Query: {var}</div>", unsafe_allow_html=True)
|
308 |
-
|
309 |
# 3) Display results
|
310 |
-
# Lexical Search Results Branch
|
311 |
if show_exact_matches:
|
312 |
st.write("Showing **Top Lexical Search results**")
|
313 |
query_substring = var.strip().lower()
|
@@ -320,32 +272,13 @@ else:
|
|
320 |
st.write('No exact matches, consider unchecking "Show only exact matches"')
|
321 |
else:
|
322 |
top_results = filtered_lexical_no_dupe # Show all matching lexical results
|
323 |
-
|
324 |
-
# --- Pagination (Above Lexical Results) ---
|
325 |
-
page_size = 15
|
326 |
-
total_results = len(top_results)
|
327 |
-
total_pages = (total_results - 1) // page_size + 1
|
328 |
-
if "page" not in st.session_state:
|
329 |
-
st.session_state.page = 1
|
330 |
-
current_page = st.session_state.page
|
331 |
-
# Top pagination widget (right aligned, occupying 1/7 of page width)
|
332 |
-
col_pag_top = st.columns([6, 1])[1]
|
333 |
-
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)),
|
334 |
-
index=current_page - 1, key="page_top")
|
335 |
-
st.session_state.page = new_page_top
|
336 |
-
|
337 |
-
start_index = (st.session_state.page - 1) * page_size
|
338 |
-
end_index = start_index + page_size
|
339 |
-
paged_results = top_results[start_index:end_index]
|
340 |
-
|
341 |
-
for i, res in enumerate(paged_results, start=start_index+1):
|
342 |
metadata = res.payload.get('metadata', {})
|
343 |
if "title" not in metadata:
|
344 |
metadata["title"] = compute_title(metadata)
|
345 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
346 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
347 |
-
|
348 |
-
st.markdown(f"#### {i}. **{title_clean}**", unsafe_allow_html=True)
|
349 |
|
350 |
objective = metadata.get("objective", "None")
|
351 |
desc_en = metadata.get("description.en", "").strip()
|
@@ -364,6 +297,7 @@ else:
|
|
364 |
if remainder_text:
|
365 |
with st.expander("Show more"):
|
366 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
|
|
367 |
with col_right:
|
368 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
369 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
@@ -377,41 +311,21 @@ else:
|
|
377 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
378 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
379 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
380 |
-
|
381 |
-
# Combine Predecessor and Successor in one row, formatted with format_project_id
|
382 |
-
predecessor = metadata.get("predecessor_id", "").strip()
|
383 |
-
successor = metadata.get("successor_id", "").strip()
|
384 |
-
extra_line = ""
|
385 |
-
if predecessor or successor:
|
386 |
-
parts = []
|
387 |
-
if predecessor:
|
388 |
-
parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
|
389 |
-
if successor:
|
390 |
-
parts.append(f"**Successor Project:** {format_project_id(successor)}")
|
391 |
-
extra_line = "<br>" + " | ".join(parts)
|
392 |
-
|
393 |
additional_text = (
|
394 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
395 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
396 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
397 |
-
f"**Budget:** Project:
|
398 |
-
|
399 |
-
f"<br>**Country:** {country_raw}<br>"
|
400 |
f"**Sector:** {crs_combined}"
|
401 |
)
|
402 |
contact = metadata.get("contact", "").strip()
|
403 |
if contact and contact.lower() != "[email protected]":
|
404 |
additional_text += f"<br>**Contact:** [email protected]"
|
405 |
st.markdown(additional_text, unsafe_allow_html=True)
|
406 |
-
st.divider()
|
407 |
|
408 |
-
|
409 |
-
col_pag_bot = st.columns([6, 1])[1]
|
410 |
-
new_page_bot = col_pag_bot.selectbox("Select Page", list(range(1, total_pages + 1)),
|
411 |
-
index=st.session_state.page - 1, key="page_bot")
|
412 |
-
st.session_state.page = new_page_bot
|
413 |
-
|
414 |
-
# Semantic Search Results Branch
|
415 |
else:
|
416 |
if not filtered_semantic_no_dupe:
|
417 |
st.write("No relevant results found.")
|
@@ -420,42 +334,30 @@ else:
|
|
420 |
total_results = len(filtered_semantic_no_dupe)
|
421 |
total_pages = (total_results - 1) // page_size + 1
|
422 |
|
|
|
423 |
if "page" not in st.session_state:
|
424 |
st.session_state.page = 1
|
425 |
current_page = st.session_state.page
|
426 |
|
427 |
-
|
428 |
-
col_pag_top = st.columns([6, 1])[1]
|
429 |
-
new_page_top = col_pag_top.selectbox("Select Page", list(range(1, total_pages + 1)),
|
430 |
-
index=current_page - 1, key="page_top_sem")
|
431 |
-
st.session_state.page = new_page_top
|
432 |
-
|
433 |
-
start_index = (st.session_state.page - 1) * page_size
|
434 |
end_index = start_index + page_size
|
435 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
436 |
|
437 |
-
|
438 |
-
page_num = f"<b style='color: green;'>{st.session_state.page}</b>" if st.session_state.page != 1 else f"<b>{st.session_state.page}</b>"
|
439 |
-
total_pages_str = f"<b>{total_pages}</b>"
|
440 |
-
st.markdown(f"Showing **{len(top_results)}** Semantic Search results (Page {page_num} of {total_pages_str})", unsafe_allow_html=True)
|
441 |
|
442 |
-
# --- RAG Answer (Left aligned, bullet points, with bold numbers) ---
|
443 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
bullet_lines.append(f"<li>{line_bold}</li>")
|
449 |
-
formatted_rag_answer = "<ul style='text-align: left; list-style-position: inside;'>" + "".join(bullet_lines) + "</ul>"
|
450 |
-
st.markdown(formatted_rag_answer, unsafe_allow_html=True)
|
451 |
st.divider()
|
452 |
|
453 |
-
for
|
454 |
metadata = res.payload.get('metadata', {})
|
455 |
if "title" not in metadata:
|
456 |
metadata["title"] = compute_title(metadata)
|
457 |
title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
|
458 |
-
st.markdown(f"#### {
|
459 |
|
460 |
desc_en = metadata.get("description.en", "").strip()
|
461 |
desc_de = metadata.get("description.de", "").strip()
|
@@ -474,6 +376,7 @@ else:
|
|
474 |
if remainder_text:
|
475 |
with st.expander("Show more"):
|
476 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
|
|
477 |
with col_right:
|
478 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
479 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
@@ -487,35 +390,22 @@ else:
|
|
487 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
488 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
489 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
490 |
-
|
491 |
-
predecessor = metadata.get("predecessor_id", "").strip()
|
492 |
-
successor = metadata.get("successor_id", "").strip()
|
493 |
-
extra_line = ""
|
494 |
-
if predecessor or successor:
|
495 |
-
parts = []
|
496 |
-
if predecessor:
|
497 |
-
parts.append(f"**Predecessor Project:** {format_project_id(predecessor)}")
|
498 |
-
if successor:
|
499 |
-
parts.append(f"**Successor Project:** {format_project_id(successor)}")
|
500 |
-
extra_line = "<br>" + " | ".join(parts)
|
501 |
-
|
502 |
additional_text = (
|
503 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
504 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
505 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
506 |
-
f"**Budget:** Project:
|
507 |
-
|
508 |
-
f"<br>**Country:** {country_raw}<br>"
|
509 |
f"**Sector:** {crs_combined}"
|
510 |
)
|
511 |
contact = metadata.get("contact", "").strip()
|
512 |
if contact and contact.lower() != "[email protected]":
|
513 |
additional_text += f"<br>**Contact:** [email protected]"
|
514 |
st.markdown(additional_text, unsafe_allow_html=True)
|
|
|
515 |
st.divider()
|
516 |
|
517 |
-
#
|
518 |
-
|
519 |
-
|
520 |
-
index=st.session_state.page - 1, key="page_bot_sem")
|
521 |
-
st.session_state.page = new_page_bot
|
|
|
26 |
from appStore.rag_utils import (
|
27 |
highlight_query,
|
28 |
get_rag_answer,
|
29 |
+
compute_title
|
|
|
30 |
)
|
31 |
from appStore.filter_utils import (
|
32 |
parse_budget,
|
|
|
93 |
""", unsafe_allow_html=True
|
94 |
)
|
95 |
|
96 |
+
# Main query input
|
97 |
+
var = st.text_input("Enter Question")
|
98 |
|
99 |
###########################################
|
100 |
# Create or load the embeddings collection
|
|
|
126 |
# Define reset_filters function using session_state
|
127 |
###########################################
|
128 |
def reset_filters():
|
129 |
+
st.session_state["region_filter"] = "All/Not allocated"
|
130 |
+
st.session_state["country_filter"] = "All/Not allocated"
|
131 |
current_year = datetime.now().year
|
132 |
default_start_year = current_year - 4
|
133 |
st.session_state["end_year_range"] = (default_start_year, max_end_year)
|
134 |
+
st.session_state["crs_filter"] = "All/Not allocated"
|
135 |
st.session_state["min_budget"] = min_budget_val
|
136 |
+
st.session_state["client_filter"] = "All/Not allocated"
|
137 |
+
# Optionally reset page number
|
|
|
138 |
st.session_state["page"] = 1
|
139 |
|
140 |
###########################################
|
|
|
143 |
col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
|
144 |
|
145 |
with col1:
|
146 |
+
region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions), key="region_filter")
|
147 |
+
|
148 |
+
if region_filter == "All/Not allocated":
|
|
|
|
|
149 |
filtered_country_names = unique_country_names
|
150 |
else:
|
151 |
filtered_country_names = [
|
152 |
name for name, code in country_name_mapping.items()
|
153 |
+
if iso_code_to_sub_region.get(code) == region_filter
|
154 |
]
|
155 |
|
156 |
with col2:
|
157 |
+
country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names, key="country_filter")
|
|
|
|
|
|
|
158 |
|
159 |
with col3:
|
160 |
current_year = datetime.now().year
|
|
|
169 |
|
170 |
with col4:
|
171 |
crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
|
172 |
+
crs_filter = st.selectbox("CRS", crs_options, key="crs_filter")
|
|
|
|
|
|
|
173 |
|
174 |
with col5:
|
175 |
min_budget = st.slider(
|
|
|
180 |
key="min_budget"
|
181 |
)
|
182 |
|
183 |
+
|
184 |
###########################################
|
185 |
# Filter Controls - Row 2 (Additional Filters)
|
186 |
###########################################
|
|
|
188 |
|
189 |
with col1_2:
|
190 |
client_options = sorted(project_data["client"].dropna().unique().tolist())
|
191 |
+
client_filter = st.selectbox("Client", ["All/Not allocated"] + client_options, key="client_filter")
|
|
|
|
|
|
|
192 |
with col2_2:
|
193 |
st.empty()
|
194 |
with col3_2:
|
|
|
196 |
with col4_2:
|
197 |
st.empty()
|
198 |
with col5_2:
|
199 |
+
st.button("Reset Filters", on_click=reset_filters)
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
show_exact_matches = st.checkbox("Show only exact matches", value=False)
|
|
|
|
|
|
|
|
|
203 |
|
204 |
###########################################
|
205 |
# Main Search / Results
|
|
|
218 |
# Apply threshold to semantic results if desired
|
219 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
220 |
|
221 |
+
# 2) Filter results based on the user’s selections
|
222 |
filtered_semantic = filter_results(
|
223 |
semantic_thresholded,
|
224 |
+
country_filter,
|
225 |
+
region_filter,
|
226 |
end_year_range,
|
227 |
+
crs_filter,
|
228 |
min_budget,
|
229 |
region_df,
|
230 |
iso_code_to_sub_region,
|
|
|
233 |
)
|
234 |
filtered_lexical = filter_results(
|
235 |
lexical_all,
|
236 |
+
country_filter,
|
237 |
+
region_filter,
|
238 |
end_year_range,
|
239 |
+
crs_filter,
|
240 |
min_budget,
|
241 |
region_df,
|
242 |
iso_code_to_sub_region,
|
|
|
244 |
get_country_name
|
245 |
)
|
246 |
|
247 |
+
# Additional filter by client
|
248 |
+
if client_filter != "All/Not allocated":
|
249 |
+
filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
|
250 |
+
filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
|
251 |
|
252 |
# Remove duplicates
|
253 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
|
|
|
259 |
except (ValueError, TypeError):
|
260 |
return value
|
261 |
|
|
|
|
|
|
|
262 |
# 3) Display results
|
|
|
263 |
if show_exact_matches:
|
264 |
st.write("Showing **Top Lexical Search results**")
|
265 |
query_substring = var.strip().lower()
|
|
|
272 |
st.write('No exact matches, consider unchecking "Show only exact matches"')
|
273 |
else:
|
274 |
top_results = filtered_lexical_no_dupe # Show all matching lexical results
|
275 |
+
for res in top_results:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
metadata = res.payload.get('metadata', {})
|
277 |
if "title" not in metadata:
|
278 |
metadata["title"] = compute_title(metadata)
|
279 |
title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
280 |
title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
|
281 |
+
st.markdown(f"#### {title_clean}", unsafe_allow_html=True)
|
|
|
282 |
|
283 |
objective = metadata.get("objective", "None")
|
284 |
desc_en = metadata.get("description.en", "").strip()
|
|
|
297 |
if remainder_text:
|
298 |
with st.expander("Show more"):
|
299 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
300 |
+
|
301 |
with col_right:
|
302 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
303 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
|
|
311 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
312 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
313 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
314 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
additional_text = (
|
316 |
f"**Objective:** {highlight_query(objective, var)}<br>"
|
317 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
318 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
319 |
+
f"**Budget:** Project: {formatted_project_budget}, Total volume: {formatted_total_volume}<br>"
|
320 |
+
f"**Country:** {country_raw}<br>"
|
|
|
321 |
f"**Sector:** {crs_combined}"
|
322 |
)
|
323 |
contact = metadata.get("contact", "").strip()
|
324 |
if contact and contact.lower() != "[email protected]":
|
325 |
additional_text += f"<br>**Contact:** [email protected]"
|
326 |
st.markdown(additional_text, unsafe_allow_html=True)
|
|
|
327 |
|
328 |
+
st.divider()
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
else:
|
330 |
if not filtered_semantic_no_dupe:
|
331 |
st.write("No relevant results found.")
|
|
|
334 |
total_results = len(filtered_semantic_no_dupe)
|
335 |
total_pages = (total_results - 1) // page_size + 1
|
336 |
|
337 |
+
# Use session_state for page selection; default to 1 if not set.
|
338 |
if "page" not in st.session_state:
|
339 |
st.session_state.page = 1
|
340 |
current_page = st.session_state.page
|
341 |
|
342 |
+
start_index = (current_page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
end_index = start_index + page_size
|
344 |
top_results = filtered_semantic_no_dupe[start_index:end_index]
|
345 |
|
346 |
+
st.write(f"Showing **{len(top_results)}** Semantic Search results (Page {current_page} of {total_pages})")
|
|
|
|
|
|
|
347 |
|
|
|
348 |
rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
|
349 |
+
st.markdown(
|
350 |
+
f"<div style='background-color: #f0f0f0; color: #333; padding: 10px; border-radius: 5px; font-size:1.2em; text-align:center;'>{rag_answer}</div>",
|
351 |
+
unsafe_allow_html=True
|
352 |
+
)
|
|
|
|
|
|
|
353 |
st.divider()
|
354 |
|
355 |
+
for res in top_results:
|
356 |
metadata = res.payload.get('metadata', {})
|
357 |
if "title" not in metadata:
|
358 |
metadata["title"] = compute_title(metadata)
|
359 |
title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
|
360 |
+
st.markdown(f"#### {title_clean}")
|
361 |
|
362 |
desc_en = metadata.get("description.en", "").strip()
|
363 |
desc_de = metadata.get("description.de", "").strip()
|
|
|
376 |
if remainder_text:
|
377 |
with st.expander("Show more"):
|
378 |
st.markdown(highlight_query(remainder_text, var), unsafe_allow_html=True)
|
379 |
+
|
380 |
with col_right:
|
381 |
start_year_str = extract_year(metadata.get('start_year', None)) or "Unknown"
|
382 |
end_year_str = extract_year(metadata.get('end_year', None)) or "Unknown"
|
|
|
390 |
new_crs_value = lookup_crs_value(crs_key_clean)
|
391 |
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
|
392 |
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
|
393 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
additional_text = (
|
395 |
f"**Objective:** {metadata.get('objective', '')}<br>"
|
396 |
f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
|
397 |
f"**Projekt duration:** {start_year_str}-{end_year_str}<br>"
|
398 |
+
f"**Budget:** Project: {formatted_project_budget}, Total volume: {formatted_total_volume}<br>"
|
399 |
+
f"**Country:** {country_raw}<br>"
|
|
|
400 |
f"**Sector:** {crs_combined}"
|
401 |
)
|
402 |
contact = metadata.get("contact", "").strip()
|
403 |
if contact and contact.lower() != "[email protected]":
|
404 |
additional_text += f"<br>**Contact:** [email protected]"
|
405 |
st.markdown(additional_text, unsafe_allow_html=True)
|
406 |
+
|
407 |
st.divider()
|
408 |
|
409 |
+
# Pagination widget moved to the very end of the page
|
410 |
+
new_page = st.selectbox("Select Page", list(range(1, total_pages + 1)), index=current_page - 1, key="page")
|
411 |
+
# The selected page value automatically updates st.session_state["page"]
|
|
|
|