TEST-GIZ-Project-Search

Sleeping

App Files Files Community

annikwag commited on Mar 11

Commit

fb7eabb

verified ·

1 Parent(s): 8816b99

Update appStore/region_utils.py

Browse files

Files changed (1) hide show

appStore/region_utils.py +60 -18

appStore/region_utils.py CHANGED Viewed

@@ -1,26 +1,68 @@
 import streamlit as st
 import pandas as pd
-# Load the region.csv once when the app starts
 @st.cache_data
-def load_region_data(file_path):
-    return pd.read_csv(file_path)
-def clean_country_code(code):
-    # Remove non-alphabetical characters and convert to uppercase.
-    return ''.join(filter(str.isalpha, code)).upper()
-def get_country_name(alpha2_code, region_df):
-    """Map ISO country code to country name (case-insensitive)."""
-    # Clean the code before processing.
-    code = clean_country_code(alpha2_code)
-    row = region_df[region_df['alpha-2'] == code]
-    return row['name'].values[0] if not row.empty else code
 def get_regions(region_df):
-    """Get unique regions and sub-regions."""
-    regions = region_df['region'].dropna().unique().tolist()
-    sub_regions = region_df['sub-region'].dropna().unique().tolist()
-    return regions, sub_regions

+import json
 import streamlit as st
 import pandas as pd
 @st.cache_data
+def load_region_data(path):
+    """
+    Load the region lookup CSV into a DataFrame.
+    """
+    return pd.read_csv(path)
+def clean_country_code(country_str):
+    """
+    Clean up a country code string to keep only uppercase alpha-2.
+    """
+    return country_str.strip().upper()
+def get_country_name(alpha2, region_df):
+    """
+    Given an alpha-2 code, return the English short name from region_df.
+    If not found, return the original code.
+    """
+    row = region_df[region_df['alpha-2'] == alpha2]
+    if not row.empty:
+        return row['name'].values[0]
+    return alpha2
 def get_regions(region_df):
+    """
+    Return a tuple: (list of unique region names, set of unique sub-regions).
+    This is just an example. Adjust as needed.
+    """
+    unique_sub_regions = sorted(region_df['sub-region'].dropna().unique())
+    return region_df, unique_sub_regions
+@st.cache_data
+def get_country_name_and_region_mapping(_client, collection_name, region_df, hybrid_search_fn, clean_country_code_fn, get_country_name_fn):
+    """
+    Build a mapping from country name -> code and code -> sub-region, by scanning the entire collection.
+    """
+    results = hybrid_search_fn(_client, "", collection_name)
+    country_set = set()
+    for res in results[0] + results[1]:
+        country = res.payload.get('metadata', {}).get('country', "[]")
+        if country.strip().startswith("["):
+            try:
+                parsed_country = json.loads(country.replace("'", '"'))
+                if isinstance(parsed_country, str):
+                    country_list = [parsed_country]
+                else:
+                    country_list = parsed_country
+            except json.JSONDecodeError:
+                country_list = []
+        else:
+            country_list = [country.strip()]
+        two_digit_codes = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
+        country_set.update(two_digit_codes)
+    country_name_to_code = {}
+    iso_code_to_sub_region = {}
+    for code in country_set:
+        name = get_country_name_fn(code, region_df)
+        sub_region_row = region_df[region_df['alpha-2'] == code]
+        sub_region = sub_region_row['sub-region'].values[0] if not sub_region_row.empty else "Not allocated"
+        country_name_to_code[name] = code
+        iso_code_to_sub_region[code] = sub_region
+    return country_name_to_code, iso_code_to_sub_region