annikwag commited on
Commit
2293687
·
verified ·
1 Parent(s): 110cb65

Update appStore/prep_data.py

Browse files
Files changed (1) hide show
  1. appStore/prep_data.py +7 -5
appStore/prep_data.py CHANGED
@@ -103,17 +103,19 @@ def process_giz_worldwide():
103
  # After reading the JSON file into giz_df, convert the crs_key column:
104
  if 'crs_key' in giz_df.columns:
105
  def clean_crs_key(x):
106
- if pd.isnull(x):
107
- return ""
 
108
  try:
109
- # Try converting to float, then to int, then to string.
110
- return str(int(float(x)))
111
  except Exception:
112
  # Fallback: remove trailing ".0" if present.
113
- return re.sub(r'\.0$', '', str(x))
114
  giz_df['crs_key'] = giz_df['crs_key'].apply(clean_crs_key)
115
 
116
 
 
117
  # Compute text_size based on merged_text and assign full text to the 'chunks' column
118
  giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)
119
 
 
103
  # After reading the JSON file into giz_df, convert the crs_key column:
104
  if 'crs_key' in giz_df.columns:
105
  def clean_crs_key(x):
106
+ x_str = str(x).strip()
107
+ if not x_str:
108
+ return x_str
109
  try:
110
+ # Convert to float then to int, then to string.
111
+ return str(int(float(x_str)))
112
  except Exception:
113
  # Fallback: remove trailing ".0" if present.
114
+ return x_str.replace('.0', '')
115
  giz_df['crs_key'] = giz_df['crs_key'].apply(clean_crs_key)
116
 
117
 
118
+
119
  # Compute text_size based on merged_text and assign full text to the 'chunks' column
120
  giz_df['text_size'] = giz_df['merged_text'].apply(lambda text: len(text.split()) if isinstance(text, str) else 0)
121