import json import streamlit as st import pandas as pd @st.cache_data def load_region_data(path): """ Load the region lookup CSV into a DataFrame. """ return pd.read_csv(path) def clean_country_code(country_str): """ Clean up a country code string to keep only uppercase alpha-2. """ return country_str.strip().upper() def get_country_name(alpha2, region_df): """ Given an alpha-2 code, return the English short name from region_df. If not found, return the original code. """ row = region_df[region_df['alpha-2'] == alpha2] if not row.empty: return row['name'].values[0] return alpha2 def get_regions(region_df): """ Return a tuple: (list of unique region names, set of unique sub-regions). This is just an example. Adjust as needed. """ unique_sub_regions = sorted(region_df['sub-region'].dropna().unique()) return region_df, unique_sub_regions @st.cache_data def get_country_name_and_region_mapping( _client, collection_name, region_df, _hybrid_search_fn, _clean_country_code_fn, _get_country_name_fn ): """ Build a mapping from country name -> code and code -> sub-region, by scanning the entire collection. """ # Note the underscores in the function parameters above. # Inside the function, just call them as usual: results = _hybrid_search_fn(_client, "", collection_name) country_set = set() for res in results[0] + results[1]: country = res.payload.get('metadata', {}).get('country', "[]") if country.strip().startswith("["): try: parsed_country = json.loads(country.replace("'", '"')) if isinstance(parsed_country, str): country_list = [parsed_country] else: country_list = parsed_country except json.JSONDecodeError: country_list = [] else: country_list = [country.strip()] two_digit_codes = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2] country_set.update(two_digit_codes) country_name_to_code = {} iso_code_to_sub_region = {} for code in country_set: name = get_country_name_fn(code, region_df) sub_region_row = region_df[region_df['alpha-2'] == code] sub_region = sub_region_row['sub-region'].values[0] if not sub_region_row.empty else "Not allocated" country_name_to_code[name] = code iso_code_to_sub_region[code] = sub_region return country_name_to_code, iso_code_to_sub_region