Spaces:
Sleeping
Sleeping
Update appStore/prep_data.py
Browse files- appStore/prep_data.py +3 -2
appStore/prep_data.py
CHANGED
@@ -99,9 +99,10 @@ def process_giz_worldwide():
|
|
99 |
# Create an empty 'url' column as the new dataset has an empty URL
|
100 |
giz_df['url'] = ''
|
101 |
|
102 |
-
# Convert CRS value
|
103 |
if 'crs_value' in giz_df.columns:
|
104 |
-
|
|
|
105 |
|
106 |
# Compute text_size based on merged_text and assign full text to the 'chunks' column
|
107 |
giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)
|
|
|
99 |
# Create an empty 'url' column as the new dataset has an empty URL
|
100 |
giz_df['url'] = ''
|
101 |
|
102 |
+
# Convert CRS value to numeric then to integer if possible.
|
103 |
if 'crs_value' in giz_df.columns:
|
104 |
+
# Convert to numeric, coercing errors to NaN, then fill NaN with 0 and cast to int.
|
105 |
+
giz_df['crs_value'] = pd.to_numeric(giz_df['crs_value'], errors='coerce').fillna(0).astype(int)
|
106 |
|
107 |
# Compute text_size based on merged_text and assign full text to the 'chunks' column
|
108 |
giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)
|