Spaces:
Sleeping
Sleeping
Update appStore/prep_data.py
Browse files- appStore/prep_data.py +7 -5
appStore/prep_data.py
CHANGED
@@ -103,17 +103,19 @@ def process_giz_worldwide():
|
|
103 |
# After reading the JSON file into giz_df, convert the crs_key column:
|
104 |
if 'crs_key' in giz_df.columns:
|
105 |
def clean_crs_key(x):
|
106 |
-
|
107 |
-
|
|
|
108 |
try:
|
109 |
-
#
|
110 |
-
return str(int(float(
|
111 |
except Exception:
|
112 |
# Fallback: remove trailing ".0" if present.
|
113 |
-
return
|
114 |
giz_df['crs_key'] = giz_df['crs_key'].apply(clean_crs_key)
|
115 |
|
116 |
|
|
|
117 |
# Compute text_size based on merged_text and assign full text to the 'chunks' column
|
118 |
giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)
|
119 |
|
|
|
103 |
# After reading the JSON file into giz_df, convert the crs_key column:
|
104 |
if 'crs_key' in giz_df.columns:
|
105 |
def clean_crs_key(x):
|
106 |
+
x_str = str(x).strip()
|
107 |
+
if not x_str:
|
108 |
+
return x_str
|
109 |
try:
|
110 |
+
# Convert to float then to int, then to string.
|
111 |
+
return str(int(float(x_str)))
|
112 |
except Exception:
|
113 |
# Fallback: remove trailing ".0" if present.
|
114 |
+
return x_str.replace('.0', '')
|
115 |
giz_df['crs_key'] = giz_df['crs_key'].apply(clean_crs_key)
|
116 |
|
117 |
|
118 |
+
|
119 |
# Compute text_size based on merged_text and assign full text to the 'chunks' column
|
120 |
giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)
|
121 |
|