puqi commited on
Commit
8ac74fd
·
1 Parent(s): b4a19b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -28
app.py CHANGED
@@ -16,10 +16,12 @@ from tqdm import tqdm
16
 
17
 
18
  st.title('A _Quickstart Notebook_ for :blue[ClimSim]:')
19
- st.link_button("ClimSim", "https://huggingface.co/datasets/LEAP/subsampled_low_res/tree/main",use_container_width=True)
20
  st.header('**Step 1:** Import data_utils')
21
  st.code('''from data_utils import *''',language='python')
22
 
 
 
23
  st.header('**Step 2:** Instantiate class')
24
  st.code('''#Change the path to your own
25
  grid_info = xr.open_dataset('ClimSim_low-res_grid-info.nc')
@@ -35,7 +37,6 @@ data = data_utils(grid_info = grid_info,
35
  output_scale = output_scale)
36
  data.set_to_v1_vars()''',language='python')
37
 
38
- #Change the path to your own
39
  grid_info = xr.open_dataset('ClimSim_low-res_grid-info.nc')
40
  input_mean = xr.open_dataset('input_mean.nc')
41
  input_max = xr.open_dataset('input_max.nc')
@@ -51,7 +52,9 @@ data = data_utils(grid_info = grid_info,
51
  data.set_to_v1_vars()
52
 
53
 
 
54
  st.header('**Step 3:** Load training and validation data')
 
55
  st.code('''data.input_train = data.load_npy_file('train_input_small.npy')
56
  data.target_train = data.load_npy_file('train_target_small.npy')
57
  data.input_val = data.load_npy_file('val_input_small.npy')
@@ -63,6 +66,7 @@ data.input_val = data.load_npy_file('val_input_small.npy')
63
  data.target_val = data.load_npy_file('val_target_small.npy')
64
 
65
 
 
66
  st.header('**Step 4:** Train models')
67
  st.subheader('Train constant prediction model')
68
  st.code('''const_model = data.target_train.mean(axis = 0)''',language='python')
@@ -70,6 +74,7 @@ st.code('''const_model = data.target_train.mean(axis = 0)''',language='python')
70
  const_model = data.target_train.mean(axis = 0)
71
 
72
 
 
73
  st.subheader('Train multiple linear regression model')
74
  st.text('adding bias unit')
75
  st.code('''X = data.input_train
@@ -81,47 +86,51 @@ bias_vector = np.ones((X.shape[0], 1))
81
  X = np.concatenate((X, bias_vector), axis=1)
82
 
83
 
 
84
  st.text('create model')
85
  st.code('''mlr_weights = np.linalg.inv(X.transpose()@X)@X.transpose()@data.target_train''',language='python')
86
 
87
  mlr_weights = np.linalg.inv(X.transpose()@X)@X.transpose()@data.target_train
88
 
89
 
 
90
  st.subheader('Train your models here')
91
  st.code('''###
92
  # train your model here
93
  ###''',language='python')
94
 
95
- ###
96
- # train your model here
97
- ###
98
-
99
-
100
- st.link_button("Go to Original Dataset", "https://huggingface.co/datasets/LEAP/subsampled_low_res/tree/main",use_container_width=True)
101
 
102
  st.header('**Step 5:** Evaluate on validation data')
 
 
103
 
 
104
 
105
 
106
 
 
 
 
 
107
 
 
 
 
 
 
 
108
 
 
109
 
 
 
 
 
110
 
111
 
112
-
113
-
114
-
115
-
116
-
117
-
118
- data.set_pressure_grid(data_split = 'val')
119
-
120
- # Constant Prediction
121
  const_pred_val = np.repeat(const_model[np.newaxis, :], data.target_val.shape[0], axis = 0)
122
  print(const_pred_val.shape)
123
 
124
- # Multiple Linear Regression
125
  X_val = data.input_val
126
  bias_vector_val = np.ones((X_val.shape[0], 1))
127
  X_val = np.concatenate((X_val, bias_vector_val), axis=1)
@@ -129,18 +138,37 @@ X_val = np.concatenate((X_val, bias_vector_val), axis=1)
129
  mlr_pred_val = X_val@mlr_weights
130
  print(mlr_pred_val.shape)
131
 
132
- # Load your prediction here
133
-
134
- # Load predictions into data_utils object
135
  data.model_names = ['const', 'mlr'] # add names of your models here
136
  preds = [const_pred_val, mlr_pred_val] # add your custom predictions here
137
  data.preds_val = dict(zip(data.model_names, preds))
138
 
 
 
 
 
 
 
 
 
 
 
139
  data.reweight_target(data_split = 'val')
140
  data.reweight_preds(data_split = 'val')
 
 
 
 
 
 
 
141
  data.metrics_names = ['MAE', 'RMSE', 'R2', 'bias']
142
  data.create_metrics_df(data_split = 'val')
143
 
 
 
 
 
 
144
  letters = string.ascii_lowercase
145
 
146
  # create custom dictionary for plotting
@@ -152,6 +180,41 @@ for metric in data.metrics_names:
152
  plot_df_byvar[metric] = plot_df_byvar[metric].rename(columns = data.var_short_names).transpose()
153
 
154
  # plot figure
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  fig, axes = plt.subplots(nrows = len(data.metrics_names), sharex = True)
156
  for i in range(len(data.metrics_names)):
157
  plot_df_byvar[data.metrics_names[i]].plot.bar(
@@ -163,8 +226,6 @@ for i in range(len(data.metrics_names)):
163
  axes[i].set_ylim(0,1)
164
  axes[i].set_title(f'({letters[i]}) {data.metrics_names[i]}')
165
 
166
-
167
-
168
  axes[i].set_xlabel('Output variable')
169
  axes[i].set_xticklabels(plot_df_byvar[data.metrics_names[i]].index, \
170
  rotation=0, ha='center')
@@ -179,14 +240,32 @@ axes[0].legend(columnspacing = .9,
179
  loc = 'upper right')
180
  fig.set_size_inches(7,8)
181
  fig.tight_layout()
 
182
  st.pyplot(fig)
 
 
 
 
 
 
 
 
 
183
 
184
- # path to target input
185
  data.input_scoring = np.load('scoring_input_small.npy')
186
- # path to target output
187
  data.target_scoring = np.load('scoring_target_small.npy')
 
 
 
 
 
 
188
  data.set_pressure_grid(data_split = 'scoring')
189
- # constant prediction
 
 
 
 
190
  const_pred_scoring = np.repeat(const_model[np.newaxis, :], data.target_scoring.shape[0], axis = 0)
191
  print(const_pred_scoring.shape)
192
 
@@ -200,9 +279,37 @@ print(mlr_pred_scoring.shape)
200
  # Your model prediction here
201
 
202
  # Load predictions into object
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  data.model_names = ['const', 'mlr'] # model name here
204
  preds = [const_pred_scoring, mlr_pred_scoring] # add prediction here
205
  data.preds_scoring = dict(zip(data.model_names, preds))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  # weight predictions and target
207
  data.reweight_target(data_split = 'scoring')
208
  data.reweight_preds(data_split = 'scoring')
@@ -211,8 +318,12 @@ data.reweight_preds(data_split = 'scoring')
211
  data.metrics_names = ['MAE', 'RMSE', 'R2', 'bias']
212
  data.create_metrics_df(data_split = 'scoring')
213
 
214
- # set plotting settings
215
 
 
 
 
 
 
216
  letters = string.ascii_lowercase
217
 
218
  # create custom dictionary for plotting
@@ -239,6 +350,41 @@ axes[i].set_xlabel('Output variable')
239
  axes[i].set_xticklabels(plot_df_byvar[data.metrics_names[i]].index, \
240
  rotation=0, ha='center')
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  axes[0].legend(columnspacing = .9,
243
  labelspacing = .3,
244
  handleheight = .07,
@@ -249,6 +395,7 @@ axes[0].legend(columnspacing = .9,
249
  loc = 'upper right')
250
  fig.set_size_inches(7,8)
251
  fig.tight_layout()
 
252
  st.pyplot(fig)
253
 
254
- st.markdown('Streamlit p')
 
16
 
17
 
18
  st.title('A _Quickstart Notebook_ for :blue[ClimSim]:')
19
+ st.link_button("Go to ClimSim Github Repository", "https://github.com/leap-stc/ClimSim/tree/main",use_container_width=True)
20
  st.header('**Step 1:** Import data_utils')
21
  st.code('''from data_utils import *''',language='python')
22
 
23
+
24
+
25
  st.header('**Step 2:** Instantiate class')
26
  st.code('''#Change the path to your own
27
  grid_info = xr.open_dataset('ClimSim_low-res_grid-info.nc')
 
37
  output_scale = output_scale)
38
  data.set_to_v1_vars()''',language='python')
39
 
 
40
  grid_info = xr.open_dataset('ClimSim_low-res_grid-info.nc')
41
  input_mean = xr.open_dataset('input_mean.nc')
42
  input_max = xr.open_dataset('input_max.nc')
 
52
  data.set_to_v1_vars()
53
 
54
 
55
+
56
  st.header('**Step 3:** Load training and validation data')
57
+ st.link_button("Go to Original Dataset", "https://huggingface.co/datasets/LEAP/subsampled_low_res/tree/main",use_container_width=True)
58
  st.code('''data.input_train = data.load_npy_file('train_input_small.npy')
59
  data.target_train = data.load_npy_file('train_target_small.npy')
60
  data.input_val = data.load_npy_file('val_input_small.npy')
 
66
  data.target_val = data.load_npy_file('val_target_small.npy')
67
 
68
 
69
+
70
  st.header('**Step 4:** Train models')
71
  st.subheader('Train constant prediction model')
72
  st.code('''const_model = data.target_train.mean(axis = 0)''',language='python')
 
74
  const_model = data.target_train.mean(axis = 0)
75
 
76
 
77
+
78
  st.subheader('Train multiple linear regression model')
79
  st.text('adding bias unit')
80
  st.code('''X = data.input_train
 
86
  X = np.concatenate((X, bias_vector), axis=1)
87
 
88
 
89
+
90
  st.text('create model')
91
  st.code('''mlr_weights = np.linalg.inv(X.transpose()@X)@X.transpose()@data.target_train''',language='python')
92
 
93
  mlr_weights = np.linalg.inv(X.transpose()@X)@X.transpose()@data.target_train
94
 
95
 
96
+
97
  st.subheader('Train your models here')
98
  st.code('''###
99
  # train your model here
100
  ###''',language='python')
101
 
 
 
 
 
 
 
102
 
103
  st.header('**Step 5:** Evaluate on validation data')
104
+ st.subheader('Set pressure grid')
105
+ st.code('''data.set_pressure_grid(data_split = 'val')''',language='python')
106
 
107
+ data.set_pressure_grid(data_split = 'val')
108
 
109
 
110
 
111
+ st.subheader('Load predictions')
112
+ st.code('''# Constant Prediction
113
+ const_pred_val = np.repeat(const_model[np.newaxis, :], data.target_val.shape[0], axis = 0)
114
+ print(const_pred_val.shape)
115
 
116
+ # Multiple Linear Regression
117
+ X_val = data.input_val
118
+ bias_vector_val = np.ones((X_val.shape[0], 1))
119
+ X_val = np.concatenate((X_val, bias_vector_val), axis=1)
120
+ mlr_pred_val = X_val@mlr_weights
121
+ print(mlr_pred_val.shape)
122
 
123
+ # Load your prediction here
124
 
125
+ # Load predictions into data_utils object
126
+ data.model_names = ['const', 'mlr'] # add names of your models here
127
+ preds = [const_pred_val, mlr_pred_val] # add your custom predictions here
128
+ data.preds_val = dict(zip(data.model_names, preds))''',language='python')
129
 
130
 
 
 
 
 
 
 
 
 
 
131
  const_pred_val = np.repeat(const_model[np.newaxis, :], data.target_val.shape[0], axis = 0)
132
  print(const_pred_val.shape)
133
 
 
134
  X_val = data.input_val
135
  bias_vector_val = np.ones((X_val.shape[0], 1))
136
  X_val = np.concatenate((X_val, bias_vector_val), axis=1)
 
138
  mlr_pred_val = X_val@mlr_weights
139
  print(mlr_pred_val.shape)
140
 
 
 
 
141
  data.model_names = ['const', 'mlr'] # add names of your models here
142
  preds = [const_pred_val, mlr_pred_val] # add your custom predictions here
143
  data.preds_val = dict(zip(data.model_names, preds))
144
 
145
+
146
+
147
+ st.subheader('Weight predictions and target')
148
+ st.text('''1.Undo output scaling
149
+ 2.Weight vertical levels by dp/g
150
+ 3.Weight horizontal area of each grid cell by a[x]/mean(a[x])
151
+ 4.Convert units to a common energy unit''')
152
+ st.code('''data.reweight_target(data_split = 'val')
153
+ data.reweight_preds(data_split = 'val')''',language='python')
154
+
155
  data.reweight_target(data_split = 'val')
156
  data.reweight_preds(data_split = 'val')
157
+
158
+
159
+
160
+ st.subheader('Set and calculate metrics')
161
+ st.code('''data.metrics_names = ['MAE', 'RMSE', 'R2', 'bias']
162
+ data.create_metrics_df(data_split = 'val')''',language='python')
163
+
164
  data.metrics_names = ['MAE', 'RMSE', 'R2', 'bias']
165
  data.create_metrics_df(data_split = 'val')
166
 
167
+
168
+
169
+ st.subheader('Create plots')
170
+ st.code('''# set plotting settings
171
+ %config InlineBackend.figure_format = 'retina'
172
  letters = string.ascii_lowercase
173
 
174
  # create custom dictionary for plotting
 
180
  plot_df_byvar[metric] = plot_df_byvar[metric].rename(columns = data.var_short_names).transpose()
181
 
182
  # plot figure
183
+ fig, axes = plt.subplots(nrows = len(data.metrics_names), sharex = True)
184
+ for i in range(len(data.metrics_names)):
185
+ plot_df_byvar[data.metrics_names[i]].plot.bar(
186
+ legend = False,
187
+ ax = axes[i])
188
+ if data.metrics_names[i] != 'R2':
189
+ axes[i].set_ylabel('$W/m^2$')
190
+ else:
191
+ axes[i].set_ylim(0,1)
192
+
193
+ axes[i].set_title(f'({letters[i]}) {data.metrics_names[i]}')
194
+ axes[i].set_xlabel('Output variable')
195
+ axes[i].set_xticklabels(plot_df_byvar[data.metrics_names[i]].index, \
196
+ rotation=0, ha='center')
197
+
198
+ axes[0].legend(columnspacing = .9,
199
+ labelspacing = .3,
200
+ handleheight = .07,
201
+ handlelength = 1.5,
202
+ handletextpad = .2,
203
+ borderpad = .2,
204
+ ncol = 3,
205
+ loc = 'upper right')
206
+ fig.set_size_inches(7,8)
207
+ fig.tight_layout()''',language='python')
208
+
209
+ letters = string.ascii_lowercase
210
+
211
+ dict_var = data.metrics_var_val
212
+ plot_df_byvar = {}
213
+ for metric in data.metrics_names:
214
+ plot_df_byvar[metric] = pd.DataFrame([dict_var[model][metric] for model in data.model_names],
215
+ index=data.model_names)
216
+ plot_df_byvar[metric] = plot_df_byvar[metric].rename(columns = data.var_short_names).transpose()
217
+
218
  fig, axes = plt.subplots(nrows = len(data.metrics_names), sharex = True)
219
  for i in range(len(data.metrics_names)):
220
  plot_df_byvar[data.metrics_names[i]].plot.bar(
 
226
  axes[i].set_ylim(0,1)
227
  axes[i].set_title(f'({letters[i]}) {data.metrics_names[i]}')
228
 
 
 
229
  axes[i].set_xlabel('Output variable')
230
  axes[i].set_xticklabels(plot_df_byvar[data.metrics_names[i]].index, \
231
  rotation=0, ha='center')
 
240
  loc = 'upper right')
241
  fig.set_size_inches(7,8)
242
  fig.tight_layout()
243
+
244
  st.pyplot(fig)
245
+ st.text('If you trained models with different hyperparameters, use the ones that performed the best on validation data for evaluation on scoring data.')
246
+
247
+
248
+ st.header('**Step 6:** Evaluate on scoring data')
249
+ st.subheader('Do this at the VERY END (when you have finished tuned the hyperparameters for your model and are seeking a final evaluation)')
250
+ st.subheader('Load scoring data')
251
+ st.code('''data.input_scoring = np.load('scoring_input_small.npy')
252
+ data.target_scoring = np.load('scoring_target_small.npy')
253
+ ''',language='python')
254
 
 
255
  data.input_scoring = np.load('scoring_input_small.npy')
 
256
  data.target_scoring = np.load('scoring_target_small.npy')
257
+
258
+
259
+
260
+ st.subheader('Set pressure grid')
261
+ st.code('''data.set_pressure_grid(data_split = 'scoring')''',language='python')
262
+
263
  data.set_pressure_grid(data_split = 'scoring')
264
+
265
+
266
+
267
+ st.subheader('Load predictions')
268
+ st.code('''# constant prediction
269
  const_pred_scoring = np.repeat(const_model[np.newaxis, :], data.target_scoring.shape[0], axis = 0)
270
  print(const_pred_scoring.shape)
271
 
 
279
  # Your model prediction here
280
 
281
  # Load predictions into object
282
+ data.model_names = ['const', 'mlr'] # model name here
283
+ preds = [const_pred_scoring, mlr_pred_scoring] # add prediction here
284
+ data.preds_scoring = dict(zip(data.model_names, preds))''',language='python')
285
+
286
+ const_pred_scoring = np.repeat(const_model[np.newaxis, :], data.target_scoring.shape[0], axis = 0)
287
+ print(const_pred_scoring.shape)
288
+
289
+ X_scoring = data.input_scoring
290
+ bias_vector_scoring = np.ones((X_scoring.shape[0], 1))
291
+ X_scoring = np.concatenate((X_scoring, bias_vector_scoring), axis=1)
292
+ mlr_pred_scoring = X_scoring@mlr_weights
293
+ print(mlr_pred_scoring.shape)
294
+
295
  data.model_names = ['const', 'mlr'] # model name here
296
  preds = [const_pred_scoring, mlr_pred_scoring] # add prediction here
297
  data.preds_scoring = dict(zip(data.model_names, preds))
298
+
299
+
300
+ st.subheader('Weight predictions and target')
301
+ st.text('''1.Undo output scaling
302
+ 2.Weight vertical levels by dp/g
303
+ 3.Weight horizontal area of each grid cell by a[x]/mean(a[x])
304
+ 4.Convert units to a common energy unit''')
305
+ st.code('''# weight predictions and target
306
+ data.reweight_target(data_split = 'scoring')
307
+ data.reweight_preds(data_split = 'scoring')
308
+
309
+ # set and calculate metrics
310
+ data.metrics_names = ['MAE', 'RMSE', 'R2', 'bias']
311
+ data.create_metrics_df(data_split = 'scoring')''',language='python')
312
+
313
  # weight predictions and target
314
  data.reweight_target(data_split = 'scoring')
315
  data.reweight_preds(data_split = 'scoring')
 
318
  data.metrics_names = ['MAE', 'RMSE', 'R2', 'bias']
319
  data.create_metrics_df(data_split = 'scoring')
320
 
 
321
 
322
+
323
+
324
+ st.subheader('Create plots')
325
+ st.code('''# set plotting settings
326
+ %config InlineBackend.figure_format = 'retina'
327
  letters = string.ascii_lowercase
328
 
329
  # create custom dictionary for plotting
 
350
  axes[i].set_xticklabels(plot_df_byvar[data.metrics_names[i]].index, \
351
  rotation=0, ha='center')
352
 
353
+ axes[0].legend(columnspacing = .9,
354
+ labelspacing = .3,
355
+ handleheight = .07,
356
+ handlelength = 1.5,
357
+ handletextpad = .2,
358
+ borderpad = .2,
359
+ ncol = 3,
360
+ loc = 'upper right')
361
+ fig.set_size_inches(7,8)
362
+ fig.tight_layout()''')
363
+
364
+ letters = string.ascii_lowercase
365
+
366
+ dict_var = data.metrics_var_scoring
367
+ plot_df_byvar = {}
368
+ for metric in data.metrics_names:
369
+ plot_df_byvar[metric] = pd.DataFrame([dict_var[model][metric] for model in data.model_names],
370
+ index=data.model_names)
371
+ plot_df_byvar[metric] = plot_df_byvar[metric].rename(columns = data.var_short_names).transpose()
372
+
373
+ fig, axes = plt.subplots(nrows = len(data.metrics_names), sharex = True)
374
+ for i in range(len(data.metrics_names)):
375
+ plot_df_byvar[data.metrics_names[i]].plot.bar(
376
+ legend = False,
377
+ ax = axes[i])
378
+ if data.metrics_names[i] != 'R2':
379
+ axes[i].set_ylabel('$W/m^2$')
380
+ else:
381
+ axes[i].set_ylim(0,1)
382
+
383
+ axes[i].set_title(f'({letters[i]}) {data.metrics_names[i]}')
384
+ axes[i].set_xlabel('Output variable')
385
+ axes[i].set_xticklabels(plot_df_byvar[data.metrics_names[i]].index, \
386
+ rotation=0, ha='center')
387
+
388
  axes[0].legend(columnspacing = .9,
389
  labelspacing = .3,
390
  handleheight = .07,
 
395
  loc = 'upper right')
396
  fig.set_size_inches(7,8)
397
  fig.tight_layout()
398
+
399
  st.pyplot(fig)
400
 
401
+ st.text('End')