annikwag commited on
Commit
d01f935
·
verified ·
1 Parent(s): b4aa482

Update appStore/prep_data.py

Browse files
Files changed (1) hide show
  1. appStore/prep_data.py +3 -2
appStore/prep_data.py CHANGED
@@ -99,9 +99,10 @@ def process_giz_worldwide():
99
  # Create an empty 'url' column as the new dataset has an empty URL
100
  giz_df['url'] = ''
101
 
102
- # Convert CRS value: use string replacement and then integer conversion.
103
  if 'crs_value' in giz_df.columns:
104
- giz_df['crs_value'] = giz_df['crs_value'].apply(convert_crs_value)
 
105
 
106
  # Compute text_size based on merged_text and assign full text to the 'chunks' column
107
  giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)
 
99
  # Create an empty 'url' column as the new dataset has an empty URL
100
  giz_df['url'] = ''
101
 
102
+ # Convert CRS value to numeric then to integer if possible.
103
  if 'crs_value' in giz_df.columns:
104
+ # Convert to numeric, coercing errors to NaN, then fill NaN with 0 and cast to int.
105
+ giz_df['crs_value'] = pd.to_numeric(giz_df['crs_value'], errors='coerce').fillna(0).astype(int)
106
 
107
  # Compute text_size based on merged_text and assign full text to the 'chunks' column
108
  giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)