TEST-GIZ-Project-Search

Sleeping

App Files Files Community

annikwag commited on Mar 12

Commit

3c9881a

verified ·

1 Parent(s): a62f0b1

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -39

app.py CHANGED Viewed

@@ -36,19 +36,15 @@ from appStore.filter_utils import (
 from appStore.crs_utils import lookup_crs_value
 ###########################################
 # Model Config
 ###########################################
-# Initialize the parser and read the configuration file
 config = configparser.ConfigParser()
 config.read('model_params.cfg')
-# Retrieve model parameters
 DEDICATED_MODEL = config.get('MODEL', 'DEDICATED_MODEL')
 DEDICATED_ENDPOINT = config.get('MODEL', 'DEDICATED_ENDPOINT')
-# Write access token from the settings
 WRITE_ACCESS_TOKEN = st.secrets["Llama_3_1"]
 st.set_page_config(page_title="SEARCH IATI", layout='wide')
@@ -108,7 +104,7 @@ collection_name = "giz_worldwide"
 client = get_client()
 print(client.get_collections())
-# If needed, once only:
 # chunks = process_giz_worldwide()
 # temp_doc = create_documents(chunks, 'chunks')
 # hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
@@ -128,7 +124,7 @@ country_name_mapping, iso_code_to_sub_region = get_country_name_and_region_mappi
 unique_country_names = sorted(country_name_mapping.keys())
 ###########################################
-# Filter Controls
 ###########################################
 col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
@@ -170,6 +166,26 @@ with col5:
 show_exact_matches = st.checkbox("Show only exact matches", value=False)
 ###########################################
 # Main Search / Results
 ###########################################
@@ -213,6 +229,11 @@ else:
         get_country_name
     )
     # Remove duplicates
     filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
     filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
@@ -229,44 +250,26 @@ else:
     # 3) Display results
     if show_exact_matches:
         # Lexical substring match only
-        st.write("Showing **Top 15 Lexical Search results**")
         query_substring = var.strip().lower()
         lexical_substring_filtered = [
-            r for r in lexical_all
             if query_substring in r.payload["page_content"].lower()
         ]
-        filtered_lexical = filter_results(
-            lexical_substring_filtered,
-            country_filter,
-            region_filter,
-            end_year_range,
-            crs_filter,
-            min_budget,
-            region_df,
-            iso_code_to_sub_region,
-            clean_country_code,
-            get_country_name
-        )
-        filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
         if not filtered_lexical_no_dupe:
             st.write('No exact matches, consider unchecking "Show only exact matches"')
         else:
-            top_results = filtered_lexical_no_dupe[:10]
-            # RAG answer
-            rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
-            st.markdown(f"<div style='text-align:center; font-size:2.1em; font-style: italic; font-weight: bold;'>{var}</div>", unsafe_allow_html=True)
-            st.write(rag_answer)
-            st.divider()
-            # Show each result
             for res in top_results:
                 metadata = res.payload.get('metadata', {})
                 if "title" not in metadata:
                     metadata["title"] = compute_title(metadata)
-                # Title
                 title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
-                st.markdown(f"#### {title_html}", unsafe_allow_html=True)
                 # Description snippet
                 objective = metadata.get("objective", "None")
@@ -301,7 +304,6 @@ else:
                     new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
                     crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
-                    # Additional text
                     additional_text = (
                         f"**Objective:** {highlight_query(objective, var)}<br>"
                         f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
@@ -323,19 +325,34 @@ else:
         if not filtered_semantic_no_dupe:
             st.write("No relevant results found.")
         else:
-            top_results = filtered_semantic_no_dupe[:10]
             rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
-            st.markdown(f"<div style='text-align:center; font-size:2.1em; font-style: italic; font-weight: bold;'>{var}</div>", unsafe_allow_html=True)
-            st.write(rag_answer)
             st.divider()
-            st.write("Showing **Top 15 Semantic Search results**")
             for res in top_results:
                 metadata = res.payload.get('metadata', {})
                 if "title" not in metadata:
                     metadata["title"] = compute_title(metadata)
-                st.markdown(f"#### {metadata['title']}")
                 desc_en = metadata.get("description.en", "").strip()
                 desc_de = metadata.get("description.de", "").strip()

 from appStore.crs_utils import lookup_crs_value
 ###########################################
 # Model Config
 ###########################################
 config = configparser.ConfigParser()
 config.read('model_params.cfg')
 DEDICATED_MODEL = config.get('MODEL', 'DEDICATED_MODEL')
 DEDICATED_ENDPOINT = config.get('MODEL', 'DEDICATED_ENDPOINT')
 WRITE_ACCESS_TOKEN = st.secrets["Llama_3_1"]
 st.set_page_config(page_title="SEARCH IATI", layout='wide')
 client = get_client()
 print(client.get_collections())
+# Uncomment if needed:
 # chunks = process_giz_worldwide()
 # temp_doc = create_documents(chunks, 'chunks')
 # hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
 unique_country_names = sorted(country_name_mapping.keys())
 ###########################################
+# Filter Controls - Row 1
 ###########################################
 col1, col2, col3, col4, col5 = st.columns([1, 1, 1, 1, 1])
 show_exact_matches = st.checkbox("Show only exact matches", value=False)
+###########################################
+# Filter Controls - Row 2 (Additional Filters)
+###########################################
+col1_2, col2_2, col3_2, col4_2, col5_2 = st.columns(5)
+with col1_2:
+    # Get unique clients from project_data; adjust column name as needed.
+    client_options = sorted(project_data["client"].dropna().unique().tolist())
+    client_filter = st.selectbox("Client", ["All/Not allocated"] + client_options)
+# Columns 2-4 reserved (empty)
+with col2_2:
+    st.empty()
+with col3_2:
+    st.empty()
+with col4_2:
+    st.empty()
+with col5_2:
+    if st.button("Reset Filters"):  # MOD: Reset button in filter row 2, col5
+         st.experimental_rerun()
 ###########################################
 # Main Search / Results
 ###########################################
         get_country_name
     )
+    # Additional filter by client (MOD: added client filter)
+    if client_filter != "All/Not allocated":
+        filtered_semantic = [r for r in filtered_semantic if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
+        filtered_lexical = [r for r in filtered_lexical if r.payload.get("metadata", {}).get("client", "Unknown Client") == client_filter]
     # Remove duplicates
     filtered_semantic_no_dupe = remove_duplicates(filtered_semantic)
     filtered_lexical_no_dupe = remove_duplicates(filtered_lexical)
     # 3) Display results
     if show_exact_matches:
         # Lexical substring match only
+        st.write("Showing **Top Lexical Search results**")  # MOD: Removed RAG answer for lexical search
         query_substring = var.strip().lower()
         lexical_substring_filtered = [
+            r for r in filtered_lexical
             if query_substring in r.payload["page_content"].lower()
         ]
+        filtered_lexical_no_dupe = remove_duplicates(lexical_substring_filtered)
         if not filtered_lexical_no_dupe:
             st.write('No exact matches, consider unchecking "Show only exact matches"')
         else:
+            # Use all matching lexical results (or slice as desired)
+            top_results = filtered_lexical_no_dupe  # or use [:15] if you want to limit
             for res in top_results:
                 metadata = res.payload.get('metadata', {})
                 if "title" not in metadata:
                     metadata["title"] = compute_title(metadata)
+                # MOD: Remove hyperlink from title by stripping <a> tags.
                 title_html = highlight_query(metadata["title"], var) if var.strip() else metadata["title"]
+                title_clean = re.sub(r'<a.*?>|</a>', '', title_html)
+                st.markdown(f"#### {title_clean}", unsafe_allow_html=True)
                 # Description snippet
                 objective = metadata.get("objective", "None")
                     new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value))
                     crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else "Unknown"
                     additional_text = (
                         f"**Objective:** {highlight_query(objective, var)}<br>"
                         f"**Commissioned by:** {metadata.get('client', 'Unknown Client')}<br>"
         if not filtered_semantic_no_dupe:
             st.write("No relevant results found.")
         else:
+            # MOD: Pagination for semantic search results
+            page_size = 15
+            total_results = len(filtered_semantic_no_dupe)
+            total_pages = (total_results - 1) // page_size + 1
+            if total_pages > 1:
+                page = st.selectbox("Page", list(range(1, total_pages + 1)))
+            else:
+                page = 1
+            start_index = (page - 1) * page_size
+            end_index = start_index + page_size
+            top_results = filtered_semantic_no_dupe[start_index:end_index]
+            st.write(f"Showing **{len(top_results)}** Semantic Search results (Page {page} of {total_pages})")
+            # MOD: Only display the RAG answer for semantic search and style it distinctively.
             rag_answer = get_rag_answer(var, top_results, DEDICATED_ENDPOINT, WRITE_ACCESS_TOKEN)
+            st.markdown(
+                f"<div style='background-color: #f0f0f0; color: #333; padding: 10px; border-radius: 5px; font-size:1.2em; text-align:center;'>{rag_answer}</div>",
+                unsafe_allow_html=True
+            )
             st.divider()
             for res in top_results:
                 metadata = res.payload.get('metadata', {})
                 if "title" not in metadata:
                     metadata["title"] = compute_title(metadata)
+                # MOD: Remove hyperlink from project title
+                title_clean = re.sub(r'<a.*?>|</a>', '', metadata["title"])
+                st.markdown(f"#### {title_clean}")
                 desc_en = metadata.get("description.en", "").strip()
                 desc_de = metadata.get("description.de", "").strip()