annikwag commited on
Commit
fb7eabb
·
verified ·
1 Parent(s): 8816b99

Update appStore/region_utils.py

Browse files
Files changed (1) hide show
  1. appStore/region_utils.py +60 -18
appStore/region_utils.py CHANGED
@@ -1,26 +1,68 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
 
4
- # Load the region.csv once when the app starts
5
  @st.cache_data
6
- def load_region_data(file_path):
7
- return pd.read_csv(file_path)
 
 
 
8
 
9
- def clean_country_code(code):
10
- # Remove non-alphabetical characters and convert to uppercase.
11
- return ''.join(filter(str.isalpha, code)).upper()
12
-
13
-
14
- def get_country_name(alpha2_code, region_df):
15
- """Map ISO country code to country name (case-insensitive)."""
16
- # Clean the code before processing.
17
- code = clean_country_code(alpha2_code)
18
- row = region_df[region_df['alpha-2'] == code]
19
- return row['name'].values[0] if not row.empty else code
20
 
 
 
 
 
 
 
 
 
 
21
 
22
  def get_regions(region_df):
23
- """Get unique regions and sub-regions."""
24
- regions = region_df['region'].dropna().unique().tolist()
25
- sub_regions = region_df['sub-region'].dropna().unique().tolist()
26
- return regions, sub_regions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
  import streamlit as st
3
  import pandas as pd
4
 
 
5
  @st.cache_data
6
+ def load_region_data(path):
7
+ """
8
+ Load the region lookup CSV into a DataFrame.
9
+ """
10
+ return pd.read_csv(path)
11
 
12
+ def clean_country_code(country_str):
13
+ """
14
+ Clean up a country code string to keep only uppercase alpha-2.
15
+ """
16
+ return country_str.strip().upper()
 
 
 
 
 
 
17
 
18
+ def get_country_name(alpha2, region_df):
19
+ """
20
+ Given an alpha-2 code, return the English short name from region_df.
21
+ If not found, return the original code.
22
+ """
23
+ row = region_df[region_df['alpha-2'] == alpha2]
24
+ if not row.empty:
25
+ return row['name'].values[0]
26
+ return alpha2
27
 
28
  def get_regions(region_df):
29
+ """
30
+ Return a tuple: (list of unique region names, set of unique sub-regions).
31
+ This is just an example. Adjust as needed.
32
+ """
33
+ unique_sub_regions = sorted(region_df['sub-region'].dropna().unique())
34
+ return region_df, unique_sub_regions
35
+
36
+ @st.cache_data
37
+ def get_country_name_and_region_mapping(_client, collection_name, region_df, hybrid_search_fn, clean_country_code_fn, get_country_name_fn):
38
+ """
39
+ Build a mapping from country name -> code and code -> sub-region, by scanning the entire collection.
40
+ """
41
+ results = hybrid_search_fn(_client, "", collection_name)
42
+ country_set = set()
43
+ for res in results[0] + results[1]:
44
+ country = res.payload.get('metadata', {}).get('country', "[]")
45
+ if country.strip().startswith("["):
46
+ try:
47
+ parsed_country = json.loads(country.replace("'", '"'))
48
+ if isinstance(parsed_country, str):
49
+ country_list = [parsed_country]
50
+ else:
51
+ country_list = parsed_country
52
+ except json.JSONDecodeError:
53
+ country_list = []
54
+ else:
55
+ country_list = [country.strip()]
56
+ two_digit_codes = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
57
+ country_set.update(two_digit_codes)
58
+
59
+ country_name_to_code = {}
60
+ iso_code_to_sub_region = {}
61
+ for code in country_set:
62
+ name = get_country_name_fn(code, region_df)
63
+ sub_region_row = region_df[region_df['alpha-2'] == code]
64
+ sub_region = sub_region_row['sub-region'].values[0] if not sub_region_row.empty else "Not allocated"
65
+ country_name_to_code[name] = code
66
+ iso_code_to_sub_region[code] = sub_region
67
+
68
+ return country_name_to_code, iso_code_to_sub_region