Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 2,645 Bytes
fb7eabb 9254d49 a04f491 fb7eabb a04f491 fb7eabb e96ae5d fb7eabb a04f491 fb7eabb 2b24fc7 fb7eabb 2b24fc7 fb7eabb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import json
import streamlit as st
import pandas as pd
@st.cache_data
def load_region_data(path):
"""
Load the region lookup CSV into a DataFrame.
"""
return pd.read_csv(path)
def clean_country_code(country_str):
"""
Clean up a country code string to keep only uppercase alpha-2.
"""
return country_str.strip().upper()
def get_country_name(alpha2, region_df):
"""
Given an alpha-2 code, return the English short name from region_df.
If not found, return the original code.
"""
row = region_df[region_df['alpha-2'] == alpha2]
if not row.empty:
return row['name'].values[0]
return alpha2
def get_regions(region_df):
"""
Return a tuple: (list of unique region names, set of unique sub-regions).
This is just an example. Adjust as needed.
"""
unique_sub_regions = sorted(region_df['sub-region'].dropna().unique())
return region_df, unique_sub_regions
@st.cache_data
def get_country_name_and_region_mapping(
_client,
collection_name,
region_df,
_hybrid_search_fn,
_clean_country_code_fn,
_get_country_name_fn
):
"""
Build a mapping from country name -> code and code -> sub-region, by scanning the entire collection.
"""
# Note the underscores in the function parameters above.
# Inside the function, just call them as usual:
results = _hybrid_search_fn(_client, "", collection_name)
country_set = set()
for res in results[0] + results[1]:
country = res.payload.get('metadata', {}).get('country', "[]")
if country.strip().startswith("["):
try:
parsed_country = json.loads(country.replace("'", '"'))
if isinstance(parsed_country, str):
country_list = [parsed_country]
else:
country_list = parsed_country
except json.JSONDecodeError:
country_list = []
else:
country_list = [country.strip()]
two_digit_codes = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
country_set.update(two_digit_codes)
country_name_to_code = {}
iso_code_to_sub_region = {}
for code in country_set:
name = get_country_name_fn(code, region_df)
sub_region_row = region_df[region_df['alpha-2'] == code]
sub_region = sub_region_row['sub-region'].values[0] if not sub_region_row.empty else "Not allocated"
country_name_to_code[name] = code
iso_code_to_sub_region[code] = sub_region
return country_name_to_code, iso_code_to_sub_region
|