TEST-GIZ-Project-Search

Running on CPU Upgrade

File size: 2,645 Bytes

fb7eabb
9254d49
a04f491
 
 
fb7eabb
 
 
 
 
a04f491
fb7eabb
 
 
 
 
e96ae5d
fb7eabb
 
 
 
 
 
 
 
 
a04f491
 
fb7eabb
 
 
 
 
 
 
 
2b24fc7
 
 
 
 
 
 
 
fb7eabb
 
 
2b24fc7
 
 
 
 
fb7eabb

import json
import streamlit as st
import pandas as pd

@st.cache_data
def load_region_data(path):
    """
    Load the region lookup CSV into a DataFrame.
    """
    return pd.read_csv(path)

def clean_country_code(country_str):
    """
    Clean up a country code string to keep only uppercase alpha-2.
    """
    return country_str.strip().upper()

def get_country_name(alpha2, region_df):
    """
    Given an alpha-2 code, return the English short name from region_df.
    If not found, return the original code.
    """
    row = region_df[region_df['alpha-2'] == alpha2]
    if not row.empty:
        return row['name'].values[0]
    return alpha2

def get_regions(region_df):
    """
    Return a tuple: (list of unique region names, set of unique sub-regions).
    This is just an example. Adjust as needed.
    """
    unique_sub_regions = sorted(region_df['sub-region'].dropna().unique())
    return region_df, unique_sub_regions

@st.cache_data
def get_country_name_and_region_mapping(
    _client, 
    collection_name, 
    region_df, 
    _hybrid_search_fn, 
    _clean_country_code_fn, 
    _get_country_name_fn
):
    """
    Build a mapping from country name -> code and code -> sub-region, by scanning the entire collection.
    """
    # Note the underscores in the function parameters above.

    # Inside the function, just call them as usual:
    results = _hybrid_search_fn(_client, "", collection_name)
    
    country_set = set()
    for res in results[0] + results[1]:
        country = res.payload.get('metadata', {}).get('country', "[]")
        if country.strip().startswith("["):
            try:
                parsed_country = json.loads(country.replace("'", '"'))
                if isinstance(parsed_country, str):
                    country_list = [parsed_country]
                else:
                    country_list = parsed_country
            except json.JSONDecodeError:
                country_list = []
        else:
            country_list = [country.strip()]
        two_digit_codes = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
        country_set.update(two_digit_codes)

    country_name_to_code = {}
    iso_code_to_sub_region = {}
    for code in country_set:
        name = get_country_name_fn(code, region_df)
        sub_region_row = region_df[region_df['alpha-2'] == code]
        sub_region = sub_region_row['sub-region'].values[0] if not sub_region_row.empty else "Not allocated"
        country_name_to_code[name] = code
        iso_code_to_sub_region[code] = sub_region

    return country_name_to_code, iso_code_to_sub_region