Spaces:
Sleeping
Sleeping
Update appStore/region_utils.py
Browse files- appStore/region_utils.py +60 -18
appStore/region_utils.py
CHANGED
@@ -1,26 +1,68 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
|
4 |
-
# Load the region.csv once when the app starts
|
5 |
@st.cache_data
|
6 |
-
def load_region_data(
|
7 |
-
|
|
|
|
|
|
|
8 |
|
9 |
-
def clean_country_code(
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
def get_country_name(alpha2_code, region_df):
|
15 |
-
"""Map ISO country code to country name (case-insensitive)."""
|
16 |
-
# Clean the code before processing.
|
17 |
-
code = clean_country_code(alpha2_code)
|
18 |
-
row = region_df[region_df['alpha-2'] == code]
|
19 |
-
return row['name'].values[0] if not row.empty else code
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def get_regions(region_df):
|
23 |
-
"""
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
|
|
|
5 |
@st.cache_data
|
6 |
+
def load_region_data(path):
|
7 |
+
"""
|
8 |
+
Load the region lookup CSV into a DataFrame.
|
9 |
+
"""
|
10 |
+
return pd.read_csv(path)
|
11 |
|
12 |
+
def clean_country_code(country_str):
|
13 |
+
"""
|
14 |
+
Clean up a country code string to keep only uppercase alpha-2.
|
15 |
+
"""
|
16 |
+
return country_str.strip().upper()
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
def get_country_name(alpha2, region_df):
|
19 |
+
"""
|
20 |
+
Given an alpha-2 code, return the English short name from region_df.
|
21 |
+
If not found, return the original code.
|
22 |
+
"""
|
23 |
+
row = region_df[region_df['alpha-2'] == alpha2]
|
24 |
+
if not row.empty:
|
25 |
+
return row['name'].values[0]
|
26 |
+
return alpha2
|
27 |
|
28 |
def get_regions(region_df):
|
29 |
+
"""
|
30 |
+
Return a tuple: (list of unique region names, set of unique sub-regions).
|
31 |
+
This is just an example. Adjust as needed.
|
32 |
+
"""
|
33 |
+
unique_sub_regions = sorted(region_df['sub-region'].dropna().unique())
|
34 |
+
return region_df, unique_sub_regions
|
35 |
+
|
36 |
+
@st.cache_data
|
37 |
+
def get_country_name_and_region_mapping(_client, collection_name, region_df, hybrid_search_fn, clean_country_code_fn, get_country_name_fn):
|
38 |
+
"""
|
39 |
+
Build a mapping from country name -> code and code -> sub-region, by scanning the entire collection.
|
40 |
+
"""
|
41 |
+
results = hybrid_search_fn(_client, "", collection_name)
|
42 |
+
country_set = set()
|
43 |
+
for res in results[0] + results[1]:
|
44 |
+
country = res.payload.get('metadata', {}).get('country', "[]")
|
45 |
+
if country.strip().startswith("["):
|
46 |
+
try:
|
47 |
+
parsed_country = json.loads(country.replace("'", '"'))
|
48 |
+
if isinstance(parsed_country, str):
|
49 |
+
country_list = [parsed_country]
|
50 |
+
else:
|
51 |
+
country_list = parsed_country
|
52 |
+
except json.JSONDecodeError:
|
53 |
+
country_list = []
|
54 |
+
else:
|
55 |
+
country_list = [country.strip()]
|
56 |
+
two_digit_codes = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
|
57 |
+
country_set.update(two_digit_codes)
|
58 |
+
|
59 |
+
country_name_to_code = {}
|
60 |
+
iso_code_to_sub_region = {}
|
61 |
+
for code in country_set:
|
62 |
+
name = get_country_name_fn(code, region_df)
|
63 |
+
sub_region_row = region_df[region_df['alpha-2'] == code]
|
64 |
+
sub_region = sub_region_row['sub-region'].values[0] if not sub_region_row.empty else "Not allocated"
|
65 |
+
country_name_to_code[name] = code
|
66 |
+
iso_code_to_sub_region[code] = sub_region
|
67 |
+
|
68 |
+
return country_name_to_code, iso_code_to_sub_region
|