Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -64,6 +64,83 @@ def get_rag_answer(query, top_results):
|
|
64 |
else:
|
65 |
return f"Error in generating answer: {response.text}"
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
#######
|
68 |
|
69 |
# get the device to be used eithe gpu or cpu
|
@@ -72,7 +149,7 @@ device = 'cuda' if cuda.is_available() else 'cpu'
|
|
72 |
|
73 |
st.set_page_config(page_title="SEARCH IATI",layout='wide')
|
74 |
st.title("GIZ Project Database (PROTOTYPE)")
|
75 |
-
var = st.text_input("Enter Search
|
76 |
|
77 |
# Load the region lookup CSV
|
78 |
region_lookup_path = "docStore/regions_lookup.csv"
|
@@ -132,80 +209,24 @@ client = get_client()
|
|
132 |
country_name_mapping, iso_code_to_sub_region = get_country_name_and_region_mapping(client, collection_name, region_df)
|
133 |
unique_country_names = sorted(country_name_mapping.keys()) # List of country names
|
134 |
|
135 |
-
# Layout filters in columns
|
136 |
col1, col2, col3, col4 = st.columns([1, 1, 1, 4])
|
137 |
-
|
138 |
-
# Region filter
|
139 |
with col1:
|
140 |
-
region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions))
|
141 |
-
|
142 |
-
# Dynamically filter countries based on selected region
|
143 |
-
if region_filter == "All/Not allocated":
|
144 |
-
filtered_country_names = unique_country_names # Show all countries if no region is selected
|
145 |
-
else:
|
146 |
-
filtered_country_names = [
|
147 |
-
name for name, code in country_name_mapping.items() if iso_code_to_sub_region.get(code) == region_filter
|
148 |
-
]
|
149 |
-
|
150 |
-
# Country filter
|
151 |
with col2:
|
152 |
-
country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names
|
153 |
-
|
154 |
-
# Year range slider # ToDo add end_year filter again
|
155 |
with col3:
|
156 |
current_year = datetime.now().year
|
157 |
-
default_start_year = current_year -
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
min_value=2010,
|
163 |
-
max_value=max_end_year,
|
164 |
-
value=(default_start_year, max_end_year),
|
165 |
-
)
|
166 |
|
167 |
# Checkbox to control whether to show only exact matches
|
168 |
show_exact_matches = st.checkbox("Show only exact matches", value=False)
|
169 |
|
170 |
-
def filter_results(results, country_filter, region_filter, end_year_range): ## ToDo add end_year filter again
|
171 |
-
filtered = []
|
172 |
-
for r in results:
|
173 |
-
metadata = r.payload.get('metadata', {})
|
174 |
-
countries = metadata.get('countries', "[]")
|
175 |
-
year_str = metadata.get('end_year')
|
176 |
-
if year_str:
|
177 |
-
extracted = extract_year(year_str)
|
178 |
-
try:
|
179 |
-
end_year_val = int(extracted) if extracted != "Unknown" else 0
|
180 |
-
except ValueError:
|
181 |
-
end_year_val = 0
|
182 |
-
else:
|
183 |
-
end_year_val = 0
|
184 |
|
185 |
-
# Convert countries to a list
|
186 |
-
try:
|
187 |
-
c_list = json.loads(countries.replace("'", '"'))
|
188 |
-
c_list = [code.upper() for code in c_list if len(code) == 2]
|
189 |
-
except json.JSONDecodeError:
|
190 |
-
c_list = []
|
191 |
-
|
192 |
-
# Translate selected country name to iso2
|
193 |
-
selected_iso_code = country_name_mapping.get(country_filter, None)
|
194 |
-
|
195 |
-
# Check if any country in the metadata matches the selected region
|
196 |
-
if region_filter != "All/Not allocated":
|
197 |
-
countries_in_region = [code for code in c_list if iso_code_to_sub_region.get(code) == region_filter]
|
198 |
-
else:
|
199 |
-
countries_in_region = c_list
|
200 |
-
|
201 |
-
# Filtering
|
202 |
-
if (
|
203 |
-
(country_filter == "All/Not allocated" or selected_iso_code in c_list)
|
204 |
-
and (region_filter == "All/Not allocated" or countries_in_region)
|
205 |
-
and (end_year_range[0] <= end_year_val <= end_year_range[1]) # ToDo add end_year filter again
|
206 |
-
):
|
207 |
-
filtered.append(r)
|
208 |
-
return filtered
|
209 |
|
210 |
# Run the search
|
211 |
|
@@ -227,10 +248,9 @@ lexical_all = [
|
|
227 |
# 2) Apply a threshold to SEMANTIC results (score >= 0.4)
|
228 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
229 |
|
230 |
-
# 2) Filter the entire sets
|
231 |
-
filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter, end_year_range) ## ToDo add end_year filter again
|
232 |
-
filtered_lexical = filter_results(lexical_all, country_filter, region_filter, end_year_range)## ToDo add end_year filter again
|
233 |
|
|
|
|
|
234 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
|
235 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
236 |
|
@@ -241,33 +261,21 @@ def format_currency(value):
|
|
241 |
return f"鈧瑊int(float(value)):,}"
|
242 |
except (ValueError, TypeError):
|
243 |
return value
|
244 |
-
|
245 |
-
#
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
if show_exact_matches:
|
248 |
-
# 1) Display heading
|
249 |
st.write(f"Showing **Top 15 Lexical Search results** for query: {var}")
|
250 |
-
|
251 |
-
# 2) Do a simple substring check (case-insensitive)
|
252 |
-
# We'll create a new list lexical_substring_filtered
|
253 |
query_substring = var.strip().lower()
|
254 |
-
lexical_substring_filtered = []
|
255 |
-
|
256 |
-
# page_content in lowercase
|
257 |
-
page_text_lower = r.payload["page_content"].lower()
|
258 |
-
# Keep this result only if the query substring is found
|
259 |
-
if query_substring in page_text_lower:
|
260 |
-
lexical_substring_filtered.append(r)
|
261 |
-
|
262 |
-
# 3) Now apply your region/country/year filter on that new list
|
263 |
-
filtered_lexical = filter_results(
|
264 |
-
lexical_substring_filtered, country_filter, region_filter, end_year_range
|
265 |
-
) ## ToDo add end_year filter again
|
266 |
-
|
267 |
-
# 4) Remove duplicates
|
268 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
269 |
-
|
270 |
-
# 5) If empty after substring + filters + dedupe, show a custom message
|
271 |
if not filtered_lexical_no_dupe:
|
272 |
st.write('No exact matches, consider unchecking "Show only exact matches"')
|
273 |
else:
|
@@ -277,21 +285,16 @@ if show_exact_matches:
|
|
277 |
st.write(rag_answer)
|
278 |
st.divider()
|
279 |
for res in top_results:
|
280 |
-
# Metadata
|
281 |
metadata = res.payload.get('metadata', {})
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
total_project = metadata.get('total_project', "Unknown")
|
288 |
-
id = metadata.get('id', "Unknown")
|
289 |
-
project_name = res.payload['metadata'].get('project_name', 'Project Link')
|
290 |
proj_id = metadata.get('id', 'Unknown')
|
291 |
-
st.markdown(f"#### {
|
292 |
-
|
293 |
-
#
|
294 |
-
# Build snippet from objectives and descriptions.
|
295 |
objectives = metadata.get("objectives", "")
|
296 |
desc_de = metadata.get("description.de", "")
|
297 |
desc_en = metadata.get("description.en", "")
|
@@ -301,21 +304,23 @@ if show_exact_matches:
|
|
301 |
preview_word_count = 200
|
302 |
preview_text = " ".join(words[:preview_word_count])
|
303 |
remainder_text = " ".join(words[preview_word_count:])
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
306 |
# Keywords
|
307 |
full_text = res.payload['page_content']
|
308 |
top_keywords = extract_top_keywords(full_text, top_n=5)
|
309 |
if top_keywords:
|
310 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
311 |
-
|
312 |
|
|
|
313 |
try:
|
314 |
-
c_list = json.loads(countries.replace("'", '"'))
|
315 |
except json.JSONDecodeError:
|
316 |
c_list = []
|
317 |
-
|
318 |
-
# Only keep country names if the region lookup returns a different value.
|
319 |
matched_countries = []
|
320 |
for code in c_list:
|
321 |
if len(code) == 2:
|
@@ -323,71 +328,34 @@ if show_exact_matches:
|
|
323 |
if resolved_name.upper() != code.upper():
|
324 |
matched_countries.append(resolved_name)
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
formatted_total_volume = format_currency(total_volume)
|
332 |
-
|
333 |
-
# Build the final string including a new row for countries.
|
334 |
-
if matched_countries:
|
335 |
-
additional_text = (
|
336 |
-
f"**{', '.join(matched_countries)}**, commissioned by **{client_name}**\n"
|
337 |
-
|
338 |
-
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
339 |
-
|
340 |
-
f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**\n"
|
341 |
-
|
342 |
-
f"Country: **{', '.join(matched_countries)}**"
|
343 |
-
)
|
344 |
-
else:
|
345 |
-
additional_text = (
|
346 |
-
f"Commissioned by **{client_name}**\n"
|
347 |
-
|
348 |
-
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
349 |
-
|
350 |
-
f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**\n"
|
351 |
-
|
352 |
-
f"Country: **{', '.join(c_list) if c_list else 'Unknown'}**"
|
353 |
-
)
|
354 |
st.markdown(additional_text)
|
355 |
st.divider()
|
356 |
|
|
|
|
|
|
|
357 |
else:
|
358 |
st.write(f"Showing **Top 15 Semantic Search results** for query: {var}")
|
359 |
-
|
360 |
if not filtered_semantic_no_dupe:
|
361 |
st.write("No relevant results found.")
|
362 |
else:
|
363 |
-
# Get the top 15 results for the RAG context
|
364 |
top_results = filtered_semantic_no_dupe[:5]
|
365 |
-
|
366 |
-
# Call the RAG function to generate an answer
|
367 |
rag_answer = get_rag_answer(var, top_results)
|
368 |
-
|
369 |
-
# Display the generated answer at the top of the page
|
370 |
st.markdown("### Generated Answer")
|
371 |
st.write(rag_answer)
|
372 |
st.divider()
|
373 |
-
|
374 |
-
# Now list each individual search result below
|
375 |
for res in top_results:
|
376 |
-
# Metadata
|
377 |
metadata = res.payload.get('metadata', {})
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
total_project = metadata.get('total_project', "Unknown")
|
384 |
-
id = metadata.get('id', "Unknown")
|
385 |
-
project_name = res.payload['metadata'].get('project_name', 'Project Link')
|
386 |
-
proj_id = metadata.get('id', 'Unknown')
|
387 |
-
st.markdown(f"#### {project_name} [{proj_id}]")
|
388 |
-
|
389 |
-
# Snippet logic (80 words)
|
390 |
-
# Build snippet from objectives and descriptions.
|
391 |
objectives = metadata.get("objectives", "")
|
392 |
desc_de = metadata.get("description.de", "")
|
393 |
desc_en = metadata.get("description.en", "")
|
@@ -397,19 +365,19 @@ else:
|
|
397 |
preview_word_count = 200
|
398 |
preview_text = " ".join(words[:preview_word_count])
|
399 |
remainder_text = " ".join(words[preview_word_count:])
|
400 |
-
st.write(preview_text
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
|
|
405 |
if top_keywords:
|
406 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
407 |
|
408 |
try:
|
409 |
-
c_list = json.loads(countries.replace("'", '"'))
|
410 |
except json.JSONDecodeError:
|
411 |
c_list = []
|
412 |
-
|
413 |
matched_countries = []
|
414 |
for code in c_list:
|
415 |
if len(code) == 2:
|
@@ -417,40 +385,13 @@ else:
|
|
417 |
if resolved_name.upper() != code.upper():
|
418 |
matched_countries.append(resolved_name)
|
419 |
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
formatted_project_budget = format_currency(total_project)
|
425 |
-
formatted_total_volume = format_currency(total_volume)
|
426 |
-
|
427 |
-
|
428 |
-
# Build the final string
|
429 |
-
if matched_countries:
|
430 |
-
additional_text = (
|
431 |
-
f"**{', '.join(matched_countries)}**, commissioned by **{client_name}**\n"
|
432 |
-
|
433 |
-
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
434 |
-
|
435 |
-
f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**\n"
|
436 |
-
|
437 |
-
f"Country: **{', '.join(matched_countries)}**"
|
438 |
-
)
|
439 |
-
else:
|
440 |
-
additional_text = (
|
441 |
-
f"Commissioned by **{client_name}**\n"
|
442 |
-
|
443 |
-
f"Projekt duration **{start_year_str}-{end_year_str}**\n"
|
444 |
-
|
445 |
-
f"Budget: Project: **{formatted_project_budget}**, Total volume: **{formatted_total_volume}**\n"
|
446 |
-
|
447 |
-
f"Country: **{', '.join(c_list) if c_list else 'Unknown'}**"
|
448 |
-
)
|
449 |
-
|
450 |
st.markdown(additional_text)
|
451 |
st.divider()
|
452 |
|
453 |
-
|
454 |
# for i in results:
|
455 |
# st.subheader(str(i.metadata['id'])+":"+str(i.metadata['title_main']))
|
456 |
# st.caption(f"Status:{str(i.metadata['status'])}, Country:{str(i.metadata['country_name'])}")
|
|
|
64 |
else:
|
65 |
return f"Error in generating answer: {response.text}"
|
66 |
|
67 |
+
|
68 |
+
#######
|
69 |
+
# Helper function: Format project id (e.g., "201940485" -> "2019.4048.5")
|
70 |
+
def format_project_id(pid):
|
71 |
+
s = str(pid)
|
72 |
+
if len(s) > 5:
|
73 |
+
return s[:4] + "." + s[4:-1] + "." + s[-1]
|
74 |
+
return s
|
75 |
+
|
76 |
+
|
77 |
+
# Helper function: Compute title from metadata using name.en (or name.de if empty)
|
78 |
+
def compute_title(metadata):
|
79 |
+
name_en = metadata.get("name.en", "").strip()
|
80 |
+
name_de = metadata.get("name.de", "").strip()
|
81 |
+
base = name_en if name_en else name_de
|
82 |
+
pid = metadata.get("id", "")
|
83 |
+
if base and pid:
|
84 |
+
return f"{base} [{format_project_id(pid)}]"
|
85 |
+
return base or "No Title"
|
86 |
+
|
87 |
+
# Helper function: Get CRS filter options from all documents in the collection
|
88 |
+
@st.cache_data
|
89 |
+
def get_crs_options(client, collection_name):
|
90 |
+
results = hybrid_search(client, "", collection_name)
|
91 |
+
all_results = results[0] + results[1]
|
92 |
+
crs_set = set()
|
93 |
+
for res in all_results:
|
94 |
+
metadata = res.payload.get('metadata', {})
|
95 |
+
crs_key = metadata.get("crs_key", "").strip()
|
96 |
+
crs_value = metadata.get("crs_value", "").strip()
|
97 |
+
if crs_key or crs_value:
|
98 |
+
crs_combined = f"{crs_key}: {crs_value}"
|
99 |
+
crs_set.add(crs_combined)
|
100 |
+
return sorted(crs_set)
|
101 |
+
|
102 |
+
# Update filter_results to also filter by crs_combined.
|
103 |
+
def filter_results(results, country_filter, region_filter, end_year_range, crs_filter):
|
104 |
+
filtered = []
|
105 |
+
for r in results:
|
106 |
+
metadata = r.payload.get('metadata', {})
|
107 |
+
countries = metadata.get('countries', "[]")
|
108 |
+
year_str = metadata.get('end_year')
|
109 |
+
if year_str:
|
110 |
+
extracted = extract_year(year_str)
|
111 |
+
try:
|
112 |
+
end_year_val = int(extracted) if extracted != "Unknown" else 0
|
113 |
+
except ValueError:
|
114 |
+
end_year_val = 0
|
115 |
+
else:
|
116 |
+
end_year_val = 0
|
117 |
+
|
118 |
+
try:
|
119 |
+
c_list = json.loads(countries.replace("'", '"'))
|
120 |
+
c_list = [code.upper() for code in c_list if len(code) == 2]
|
121 |
+
except json.JSONDecodeError:
|
122 |
+
c_list = []
|
123 |
+
|
124 |
+
selected_iso_code = country_name_mapping.get(country_filter, None)
|
125 |
+
if region_filter != "All/Not allocated":
|
126 |
+
countries_in_region = [code for code in c_list if iso_code_to_sub_region.get(code) == region_filter]
|
127 |
+
else:
|
128 |
+
countries_in_region = c_list
|
129 |
+
|
130 |
+
# Filter by CRS: compute crs_combined and compare to the selected filter.
|
131 |
+
crs_key = metadata.get("crs_key", "").strip()
|
132 |
+
crs_value = metadata.get("crs_value", "").strip()
|
133 |
+
crs_combined = f"{crs_key}: {crs_value}" if (crs_key or crs_value) else ""
|
134 |
+
|
135 |
+
if crs_filter != "All/Not allocated" and crs_filter != crs_combined:
|
136 |
+
continue
|
137 |
+
|
138 |
+
if ((country_filter == "All/Not allocated" or selected_iso_code in c_list)
|
139 |
+
and (region_filter == "All/Not allocated" or countries_in_region)
|
140 |
+
and (end_year_range[0] <= end_year_val <= end_year_range[1])):
|
141 |
+
filtered.append(r)
|
142 |
+
return filtered
|
143 |
+
|
144 |
#######
|
145 |
|
146 |
# get the device to be used eithe gpu or cpu
|
|
|
149 |
|
150 |
st.set_page_config(page_title="SEARCH IATI",layout='wide')
|
151 |
st.title("GIZ Project Database (PROTOTYPE)")
|
152 |
+
var = st.text_input("Enter Search Question")
|
153 |
|
154 |
# Load the region lookup CSV
|
155 |
region_lookup_path = "docStore/regions_lookup.csv"
|
|
|
209 |
country_name_mapping, iso_code_to_sub_region = get_country_name_and_region_mapping(client, collection_name, region_df)
|
210 |
unique_country_names = sorted(country_name_mapping.keys()) # List of country names
|
211 |
|
212 |
+
# Layout filters in columns: add a new filter for CRS in col4.
|
213 |
col1, col2, col3, col4 = st.columns([1, 1, 1, 4])
|
|
|
|
|
214 |
with col1:
|
215 |
+
region_filter = st.selectbox("Region", ["All/Not allocated"] + sorted(unique_sub_regions))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
with col2:
|
217 |
+
country_filter = st.selectbox("Country", ["All/Not allocated"] + filtered_country_names if (filtered_country_names := unique_country_names) else unique_country_names)
|
|
|
|
|
218 |
with col3:
|
219 |
current_year = datetime.now().year
|
220 |
+
default_start_year = current_year - 4
|
221 |
+
end_year_range = st.slider("Project End Year", min_value=2010, max_value=max_end_year, value=(default_start_year, max_end_year))
|
222 |
+
with col4:
|
223 |
+
crs_options = ["All/Not allocated"] + get_crs_options(client, collection_name)
|
224 |
+
crs_filter = st.selectbox("CRS", crs_options)
|
|
|
|
|
|
|
|
|
225 |
|
226 |
# Checkbox to control whether to show only exact matches
|
227 |
show_exact_matches = st.checkbox("Show only exact matches", value=False)
|
228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
# Run the search
|
232 |
|
|
|
248 |
# 2) Apply a threshold to SEMANTIC results (score >= 0.4)
|
249 |
semantic_thresholded = [r for r in semantic_all if r.score >= 0.0]
|
250 |
|
|
|
|
|
|
|
251 |
|
252 |
+
filtered_semantic = filter_results(semantic_thresholded, country_filter, region_filter, end_year_range, crs_filter)
|
253 |
+
filtered_lexical = filter_results(lexical_all, country_filter, region_filter, end_year_range, crs_filter)
|
254 |
filtered_semantic_no_dupe = remove_duplicates(filtered_semantic) # ToDo remove duplicates again?
|
255 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
256 |
|
|
|
261 |
return f"鈧瑊int(float(value)):,}"
|
262 |
except (ValueError, TypeError):
|
263 |
return value
|
264 |
+
|
265 |
+
# Helper function to highlight query matches (case-insensitive)
|
266 |
+
def highlight_query(text, query):
|
267 |
+
pattern = re.compile(re.escape(query), re.IGNORECASE)
|
268 |
+
return pattern.sub(lambda m: f"**{m.group(0)}**", text)
|
269 |
+
|
270 |
+
###############################
|
271 |
+
# Display Lexical Results Branch
|
272 |
+
###############################
|
273 |
if show_exact_matches:
|
|
|
274 |
st.write(f"Showing **Top 15 Lexical Search results** for query: {var}")
|
|
|
|
|
|
|
275 |
query_substring = var.strip().lower()
|
276 |
+
lexical_substring_filtered = [r for r in lexical_all if query_substring in r.payload["page_content"].lower()]
|
277 |
+
filtered_lexical = filter_results(lexical_substring_filtered, country_filter, region_filter, end_year_range, crs_filter)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
|
|
|
|
|
279 |
if not filtered_lexical_no_dupe:
|
280 |
st.write('No exact matches, consider unchecking "Show only exact matches"')
|
281 |
else:
|
|
|
285 |
st.write(rag_answer)
|
286 |
st.divider()
|
287 |
for res in top_results:
|
|
|
288 |
metadata = res.payload.get('metadata', {})
|
289 |
+
# Compute new title if not already set
|
290 |
+
if "title" not in metadata:
|
291 |
+
metadata["title"] = compute_title(metadata)
|
292 |
+
# Use new title instead of project_name and highlight query if present
|
293 |
+
display_title = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
|
|
|
|
|
|
|
294 |
proj_id = metadata.get('id', 'Unknown')
|
295 |
+
st.markdown(f"#### {display_title} [{proj_id}]")
|
296 |
+
|
297 |
+
# Build snippet with potential highlighting
|
|
|
298 |
objectives = metadata.get("objectives", "")
|
299 |
desc_de = metadata.get("description.de", "")
|
300 |
desc_en = metadata.get("description.en", "")
|
|
|
304 |
preview_word_count = 200
|
305 |
preview_text = " ".join(words[:preview_word_count])
|
306 |
remainder_text = " ".join(words[preview_word_count:])
|
307 |
+
preview_text = highlight_query(preview_text, var) if var.strip() else preview_text
|
308 |
+
st.write(preview_text)
|
309 |
+
if remainder_text:
|
310 |
+
with st.expander("Show more"):
|
311 |
+
st.write(remainder_text)
|
312 |
+
|
313 |
# Keywords
|
314 |
full_text = res.payload['page_content']
|
315 |
top_keywords = extract_top_keywords(full_text, top_n=5)
|
316 |
if top_keywords:
|
317 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
|
|
318 |
|
319 |
+
# Country info
|
320 |
try:
|
321 |
+
c_list = json.loads(metadata.get('countries', "[]").replace("'", '"'))
|
322 |
except json.JSONDecodeError:
|
323 |
c_list = []
|
|
|
|
|
324 |
matched_countries = []
|
325 |
for code in c_list:
|
326 |
if len(code) == 2:
|
|
|
328 |
if resolved_name.upper() != code.upper():
|
329 |
matched_countries.append(resolved_name)
|
330 |
|
331 |
+
additional_text = f"Country: **{', '.join(matched_countries) if matched_countries else 'Unknown'}**"
|
332 |
+
# Add contact info if available and not [email protected]
|
333 |
+
contact = metadata.get("contact", "").strip()
|
334 |
+
if contact and contact.lower() != "[email protected]":
|
335 |
+
additional_text += f" | Contact: **{contact}**"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
st.markdown(additional_text)
|
337 |
st.divider()
|
338 |
|
339 |
+
###############################
|
340 |
+
# Display Semantic Results Branch
|
341 |
+
###############################
|
342 |
else:
|
343 |
st.write(f"Showing **Top 15 Semantic Search results** for query: {var}")
|
|
|
344 |
if not filtered_semantic_no_dupe:
|
345 |
st.write("No relevant results found.")
|
346 |
else:
|
|
|
347 |
top_results = filtered_semantic_no_dupe[:5]
|
|
|
|
|
348 |
rag_answer = get_rag_answer(var, top_results)
|
|
|
|
|
349 |
st.markdown("### Generated Answer")
|
350 |
st.write(rag_answer)
|
351 |
st.divider()
|
|
|
|
|
352 |
for res in top_results:
|
|
|
353 |
metadata = res.payload.get('metadata', {})
|
354 |
+
if "title" not in metadata:
|
355 |
+
metadata["title"] = compute_title(metadata)
|
356 |
+
display_title = metadata["title"]
|
357 |
+
st.markdown(f"#### {display_title} [{metadata.get('id', 'Unknown')}]")
|
358 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
objectives = metadata.get("objectives", "")
|
360 |
desc_de = metadata.get("description.de", "")
|
361 |
desc_en = metadata.get("description.en", "")
|
|
|
365 |
preview_word_count = 200
|
366 |
preview_text = " ".join(words[:preview_word_count])
|
367 |
remainder_text = " ".join(words[preview_word_count:])
|
368 |
+
st.write(preview_text)
|
369 |
+
if remainder_text:
|
370 |
+
with st.expander("Show more"):
|
371 |
+
st.write(remainder_text)
|
372 |
+
|
373 |
+
top_keywords = extract_top_keywords(res.payload['page_content'], top_n=5)
|
374 |
if top_keywords:
|
375 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
376 |
|
377 |
try:
|
378 |
+
c_list = json.loads(metadata.get('countries', "[]").replace("'", '"'))
|
379 |
except json.JSONDecodeError:
|
380 |
c_list = []
|
|
|
381 |
matched_countries = []
|
382 |
for code in c_list:
|
383 |
if len(code) == 2:
|
|
|
385 |
if resolved_name.upper() != code.upper():
|
386 |
matched_countries.append(resolved_name)
|
387 |
|
388 |
+
additional_text = f"Country: **{', '.join(matched_countries) if matched_countries else 'Unknown'}**"
|
389 |
+
contact = metadata.get("contact", "").strip()
|
390 |
+
if contact and contact.lower() != "[email protected]":
|
391 |
+
additional_text += f" | Contact: **{contact}**"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
st.markdown(additional_text)
|
393 |
st.divider()
|
394 |
|
|
|
395 |
# for i in results:
|
396 |
# st.subheader(str(i.metadata['id'])+":"+str(i.metadata['title_main']))
|
397 |
# st.caption(f"Status:{str(i.metadata['status'])}, Country:{str(i.metadata['country_name'])}")
|