annikwag commited on
Commit
3c9881a
·
verified ·
1 Parent(s): a62f0b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -39
app.py CHANGED
@@ -36,19 +36,15 @@ from appStore.filter_utils import (
36
 
37
  from appStore.crs_utils import lookup_crs_value
38
 
39
-
40
  ###########################################
41
  # Model Config
42
  ###########################################
43
 
44
- # Initialize the parser and read the configuration file
45
  config = configparser.ConfigParser()
46
  config.read('model_params.cfg')
47
 
48
- # Retrieve model parameters
49
  DEDICATED_MODEL = config.get('MODEL', 'DEDICATED_MODEL')
50
  DEDICATED_ENDPOINT = config.get('MODEL', 'DEDICATED_ENDPOINT')
51
- # Write access token from the settings
52
  WRITE_ACCESS_TOKEN = st.secrets["Llama_3_1"]
53
 
54
  st.set_page_config(page_title="SEARCH IATI", layout='wide')
@@ -108,7 +104,7 @@ collection_name = "giz_worldwide"
108
  client = get_client()
109
  print(client.get_collections())
110
 
111
- # If needed, once only:
112
  # chunks = process_giz_worldwide()
113
  # temp_doc = create_documents(chunks, 'chunks')
114
  # hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
@@ -128,7 +124,7 @@ country_name_mapping, iso_code_to_sub_region = get_country_name_and_region_mappi
128
  unique_country_names = sorted(country_name_mapping.keys())
129
 
130
  ###########################################
131
- # Filter Controls
132
  ###########################################
133
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
134
 
@@ -170,6 +166,26 @@ with col5:
170
 
171
  show_exact_matches = st.checkbox("Show only exact matches", value=False)
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  ###########################################
174
  # Main Search / Results
175
  ###########################################
@@ -213,6 +229,11 @@ else:
213
  get_country_name
214
  )
215
 
 
 
 
 
 
216
  # Remove duplicates
217
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
218
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
@@ -229,44 +250,26 @@ else:
229
  # 3) Display results
230
  if show_exact_matches:
231
  # Lexical substring match only
232
- st.write("Showing **Top 15 Lexical Search results**")
233
  query_substring = var.strip().lower()
234
  lexical_substring_filtered = [
235
- r for r in lexical_all
236
  if query_substring in r.payload["page_content"].lower()
237
  ]
238
- filtered_lexical = filter_results(
239
- lexical_substring_filtered,
240
- country_filter,
241
- region_filter,
242
- end_year_range,
243
- crs_filter,
244
- min_budget,
245
- region_df,
246
- iso_code_to_sub_region,
247
- clean_country_code,
248
- get_country_name
249
- )
250
- filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
251
  if not filtered_lexical_no_dupe:
252
  st.write('No exact matches, consider unchecking "Show only exact matches"')
253
  else:
254
- top_results = filtered_lexical_no_dupe[:10]
255
- # RAG answer
256
- rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
257
- st.markdown(f"<div style='text-align:center; font-size:2.1em; font-style: italic; font-weight: bold;'>{var}</div>", unsafe_allow_html=True)
258
- st.write(rag_answer)
259
- st.divider()
260
-
261
- # Show each result
262
  for res in top_results:
263
  metadata = res.payload.get('metadata', {})
264
  if "title" not in metadata:
265
  metadata["title"] = compute_title(metadata)
266
-
267
- # Title
268
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
269
- st.markdown(f"#### {title_html}", unsafe_allow_html=True)
 
270
 
271
  # Description snippet
272
  objective = metadata.get("objective", "None")
@@ -301,7 +304,6 @@ else:
301
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
302
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
303
 
304
- # Additional text
305
  additional_text = (
306
  f"**Objective:** {highlight_query(objective, var)}<br>"
307
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
@@ -323,19 +325,34 @@ else:
323
  if not filtered_semantic_no_dupe:
324
  st.write("No relevant results found.")
325
  else:
326
- top_results = filtered_semantic_no_dupe[:10]
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
328
- st.markdown(f"<div style='text-align:center; font-size:2.1em; font-style: italic; font-weight: bold;'>{var}</div>", unsafe_allow_html=True)
329
- st.write(rag_answer)
 
 
330
  st.divider()
331
- st.write("Showing **Top 15 Semantic Search results**")
332
 
333
  for res in top_results:
334
  metadata = res.payload.get('metadata', {})
335
  if "title" not in metadata:
336
  metadata["title"] = compute_title(metadata)
337
-
338
- st.markdown(f"#### {metadata['title']}")
 
339
 
340
  desc_en = metadata.get("description.en", "").strip()
341
  desc_de = metadata.get("description.de", "").strip()
 
36
 
37
  from appStore.crs_utils import lookup_crs_value
38
 
 
39
  ###########################################
40
  # Model Config
41
  ###########################################
42
 
 
43
  config = configparser.ConfigParser()
44
  config.read('model_params.cfg')
45
 
 
46
  DEDICATED_MODEL = config.get('MODEL', 'DEDICATED_MODEL')
47
  DEDICATED_ENDPOINT = config.get('MODEL', 'DEDICATED_ENDPOINT')
 
48
  WRITE_ACCESS_TOKEN = st.secrets["Llama_3_1"]
49
 
50
  st.set_page_config(page_title="SEARCH IATI", layout='wide')
 
104
  client = get_client()
105
  print(client.get_collections())
106
 
107
+ # Uncomment if needed:
108
  # chunks = process_giz_worldwide()
109
  # temp_doc = create_documents(chunks, 'chunks')
110
  # hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
 
124
  unique_country_names = sorted(country_name_mapping.keys())
125
 
126
  ###########################################
127
+ # Filter Controls - Row 1
128
  ###########################################
129
  col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
130
 
 
166
 
167
  show_exact_matches = st.checkbox("Show only exact matches", value=False)
168
 
169
+ ###########################################
170
+ # Filter Controls - Row 2 (Additional Filters)
171
+ ###########################################
172
+ col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
173
+
174
+ with col1_2:
175
+ # Get unique clients from project_data; adjust column name as needed.
176
+ client_options = sorted(project_data["client"].dropna().unique().tolist())
177
+ client_filter = st.selectbox("Client", ["All/Not allocated"] + client_options)
178
+ # Columns 2-4 reserved (empty)
179
+ with col2_2:
180
+ st.empty()
181
+ with col3_2:
182
+ st.empty()
183
+ with col4_2:
184
+ st.empty()
185
+ with col5_2:
186
+ if st.button("Reset Filters"): # MOD: Reset button in filter row 2, col5
187
+ st.experimental_rerun()
188
+
189
  ###########################################
190
  # Main Search / Results
191
  ###########################################
 
229
  get_country_name
230
  )
231
 
232
+ # Additional filter by client (MOD: added client filter)
233
+ if client_filter != "All/Not allocated":
234
+ filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
235
+ filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
236
+
237
  # Remove duplicates
238
  filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
239
  filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
 
250
  # 3) Display results
251
  if show_exact_matches:
252
  # Lexical substring match only
253
+ st.write("Showing **Top Lexical Search results**") # MOD: Removed RAG answer for lexical search
254
  query_substring = var.strip().lower()
255
  lexical_substring_filtered = [
256
+ r for r in filtered_lexical
257
  if query_substring in r.payload["page_content"].lower()
258
  ]
259
+ filtered_lexical_no_dupe = remove_duplicates(lexical_substring_filtered)
 
 
 
 
 
 
 
 
 
 
 
 
260
  if not filtered_lexical_no_dupe:
261
  st.write('No exact matches, consider unchecking "Show only exact matches"')
262
  else:
263
+ # Use all matching lexical results (or slice as desired)
264
+ top_results = filtered_lexical_no_dupe # or use [:15] if you want to limit
 
 
 
 
 
 
265
  for res in top_results:
266
  metadata = res.payload.get('metadata', {})
267
  if "title" not in metadata:
268
  metadata["title"] = compute_title(metadata)
269
+ # MOD: Remove hyperlink from title by stripping <a> tags.
 
270
  title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
271
+ title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
272
+ st.markdown(f"#### {title_clean}", unsafe_allow_html=True)
273
 
274
  # Description snippet
275
  objective = metadata.get("objective", "None")
 
304
  new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
305
  crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
306
 
 
307
  additional_text = (
308
  f"**Objective:** {highlight_query(objective, var)}<br>"
309
  f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
 
325
  if not filtered_semantic_no_dupe:
326
  st.write("No relevant results found.")
327
  else:
328
+ # MOD: Pagination for semantic search results
329
+ page_size = 15
330
+ total_results = len(filtered_semantic_no_dupe)
331
+ total_pages = (total_results - 1) // page_size + 1
332
+ if total_pages > 1:
333
+ page = st.selectbox("Page", list(range(1, total_pages + 1)))
334
+ else:
335
+ page = 1
336
+ start_index = (page - 1) * page_size
337
+ end_index = start_index + page_size
338
+ top_results = filtered_semantic_no_dupe[start_index:end_index]
339
+ st.write(f"Showing **{len(top_results)}** Semantic Search results (Page {page} of {total_pages})")
340
+
341
+ # MOD: Only display the RAG answer for semantic search and style it distinctively.
342
  rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
343
+ st.markdown(
344
+ f"<div style='background-color: #f0f0f0; color: #333; padding: 10px; border-radius: 5px; font-size:1.2em; text-align:center;'>{rag_answer}</div>",
345
+ unsafe_allow_html=True
346
+ )
347
  st.divider()
 
348
 
349
  for res in top_results:
350
  metadata = res.payload.get('metadata', {})
351
  if "title" not in metadata:
352
  metadata["title"] = compute_title(metadata)
353
+ # MOD: Remove hyperlink from project title
354
+ title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
355
+ st.markdown(f"#### {title_clean}")
356
 
357
  desc_en = metadata.get("description.en", "").strip()
358
  desc_de = metadata.get("description.de", "").strip()