geoalgo commited on
Commit
6f977e2
·
1 Parent(s): 0aedfd6

add results

Browse files
results/.DS_Store ADDED
Binary file (6.15 kB). View file
 
results/AutoGluon (best)/all_results.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ dataset,model,eval_metrics/normalized-error,eval_metrics/fit-time-per-1K-rows,eval_metrics/inference-time-per-1K-rows,problem_type,num_features
2
+ airline,AutoGluon (best),0.4579066991764239,64,47,Regression,12
3
+ electricity,AutoGluon (best),0.5994102307296079,18,46,Classification,2
4
+ solar-energy,AutoGluon (best),0.04653723902390405,98,12,Multi-classification,3
5
+ traffic,AutoGluon (best),0.8329854602344595,56,57,Multi-classification,12
6
+ volcano,AutoGluon (best),0.1941381662313174,15,11,Regression,12
results/AutoGluon (best)/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "AutoGluon (best)", "method_type": "AutoML"}
results/CatBoost/all_results.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ dataset,model,eval_metrics/normalized-error,eval_metrics/fit-time-per-1K-rows,eval_metrics/inference-time-per-1K-rows,problem_type,num_features
2
+ airline,CatBoost,0.9390154997236811,9,60,Regression,12
3
+ electricity,CatBoost,0.5553220864715752,83,99,Classification,2
4
+ solar-energy,CatBoost,0.29752892257633534,75,75,Multi-classification,3
5
+ traffic,CatBoost,0.40131222920888976,42,25,Multi-classification,12
6
+ volcano,CatBoost,0.07149164567350186,93,30,Regression,12
results/CatBoost/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "CatBoost", "method_type": "Boosted-tree"}
results/KNN/all_results.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ dataset,model,eval_metrics/normalized-error,eval_metrics/fit-time-per-1K-rows,eval_metrics/inference-time-per-1K-rows,problem_type,num_features
2
+ airline,KNN,0.16340155736699258,82,41,Regression,12
3
+ electricity,KNN,0.9561332837281634,88,60,Classification,2
4
+ solar-energy,KNN,0.8283974331685697,55,53,Multi-classification,3
5
+ traffic,KNN,0.8769826951645302,11,65,Multi-classification,12
6
+ volcano,KNN,0.5149493758906379,82,49,Regression,12
results/KNN/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "KNN", "method_type": "Other"}
results/TabPFN-v2/all_results.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ dataset,model,eval_metrics/normalized-error,eval_metrics/fit-time-per-1K-rows,eval_metrics/inference-time-per-1K-rows,problem_type,num_features
2
+ airline,TabPFN-v2,0.442424494400528,35,25,Regression,12
3
+ electricity,TabPFN-v2,0.6619106100291952,40,67,Classification,2
4
+ solar-energy,TabPFN-v2,0.011669465671394597,72,70,Multi-classification,3
5
+ traffic,TabPFN-v2,0.6169987849711932,30,57,Multi-classification,12
6
+ volcano,TabPFN-v2,0.2498786665823265,2,57,Regression,12
results/TabPFN-v2/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "TabPFN-v2", "method_type": "Foundational"}
results/TabPFN/all_results.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ dataset,model,eval_metrics/normalized-error,eval_metrics/fit-time-per-1K-rows,eval_metrics/inference-time-per-1K-rows,problem_type,num_features
2
+ airline,TabPFN,0.7101141981744494,76,94,Regression,12
3
+ electricity,TabPFN,0.11984123950907233,70,7,Classification,2
4
+ solar-energy,TabPFN,0.01033281899202676,69,69,Multi-classification,3
5
+ traffic,TabPFN,0.8682598272397801,47,22,Multi-classification,12
6
+ volcano,TabPFN,0.19462280337949245,3,71,Regression,12
results/TabPFN/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": "TabPFN", "method_type": "Foundational"}
results/dataset_properties.csv CHANGED
@@ -1,4 +1,4 @@
1
- dataset,problem_type,num_variates
2
  airline,Regression,12
3
  electricity,Classification,2
4
  solar-energy,Multi-classification,3
 
1
+ dataset,problem_type,num_features
2
  airline,Regression,12
3
  electricity,Classification,2
4
  solar-energy,Multi-classification,3
src/constants.py CHANGED
@@ -22,4 +22,4 @@ class MethodTypes:
22
  class DatasetInfo:
23
  col_name: str = "dataset"
24
  num_rows: str = "num_rows"
25
- num_features: str = "num_variates"
 
22
  class DatasetInfo:
23
  col_name: str = "dataset"
24
  num_rows: str = "num_rows"
25
+ num_features: str = "num_features"
src/utils.py CHANGED
@@ -122,9 +122,6 @@ def get_grouped_dfs(root_dir='results', ds_properties='results/dataset_propertie
122
  if file == 'all_results.csv':
123
  file_path = os.path.join(subdir, file)
124
  df = pd.read_csv(file_path)
125
- # Rename the column if it exists
126
- if 'eval_metrics/MASE[0.5]' in df.columns:
127
- df = df.rename(columns={'eval_metrics/MASE[0.5]': 'eval_metrics/normalized-error'})
128
  df_list.append(df)
129
  # Concatenate all dataframes into one
130
  all_results_df = pd.concat(df_list, ignore_index=True)
@@ -167,27 +164,34 @@ def get_grouped_dfs(root_dir='results', ds_properties='results/dataset_propertie
167
  # df = unify_freq(df)
168
  # standardize by seasonal naive
169
  # df = standardize_df(df)
170
- metric_columns = ['eval_metrics/MSE[mean]', 'eval_metrics/MSE[0.5]', 'eval_metrics/MAE[0.5]',
171
- 'eval_metrics/normalized-error', 'eval_metrics/MAPE[0.5]', 'eval_metrics/sMAPE[0.5]',
172
- 'eval_metrics/MSIS', 'eval_metrics/RMSE[mean]', 'eval_metrics/NRMSE[mean]',
173
- 'eval_metrics/ND[0.5]', 'eval_metrics/mean_weighted_sum_quantile_loss']
174
  RANKING_METRIC = "eval_metrics/normalized-error"
175
- df['rank'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first',
176
- ascending=True)
177
- # create a new column called rank
178
- metric_columns.append('rank')
179
- # create a new column called univariate. Set it to true if column num_variates is 1, otherwise set it to false
180
- df['univariate'] = df['num_variates'] == 1
 
 
 
 
181
 
182
  # group by domain
183
  grouped_results_overall = df.groupby(['model'])[METRIC_CHOICES].agg(stats.gmean)
184
  grouped_results_overall_rank = df.groupby(['model'])[['rank']].mean()
185
- grouped_results_overall = pd.concat([grouped_results_overall, grouped_results_overall_rank], axis=1)
 
 
 
 
 
 
186
 
187
  # grouped_results_overall = grouped_results_overall.rename(columns={'model':'Model'})
188
  # grouped_results.to_csv(f'artefacts/grouped_results_by_model.csv')
189
  grouped_dfs = {}
190
- # for col_name in ["domain", 'term_length', 'frequency', 'univariate']:
191
  for col_name in [ProblemTypes.col_name]:
192
  grouped_dfs[col_name] = group_by(df, col_name)
193
  # print(f"Grouping by {col_name}:\n {grouped_dfs.head(20)}")
 
122
  if file == 'all_results.csv':
123
  file_path = os.path.join(subdir, file)
124
  df = pd.read_csv(file_path)
 
 
 
125
  df_list.append(df)
126
  # Concatenate all dataframes into one
127
  all_results_df = pd.concat(df_list, ignore_index=True)
 
164
  # df = unify_freq(df)
165
  # standardize by seasonal naive
166
  # df = standardize_df(df)
167
+
168
+ # TODO change to ELO
 
 
169
  RANKING_METRIC = "eval_metrics/normalized-error"
170
+
171
+ # compute metrics that requires all methods results such as Rank and Elo.
172
+ df['rank'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first', ascending=True)
173
+
174
+ # TODO compute ELO from available data
175
+ df['ELO'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first',
176
+ ascending=True) * 100
177
+
178
+ # # create a new column called univariate. Set it to true if column num_variates is 1, otherwise set it to false
179
+ # df['univariate'] = df['num_variates'] == 1
180
 
181
  # group by domain
182
  grouped_results_overall = df.groupby(['model'])[METRIC_CHOICES].agg(stats.gmean)
183
  grouped_results_overall_rank = df.groupby(['model'])[['rank']].mean()
184
+ grouped_results_overall_elo = df.groupby(['model'])[['ELO']].mean()
185
+ grouped_results_overall = pd.concat([
186
+ grouped_results_overall,
187
+ grouped_results_overall_rank,
188
+ grouped_results_overall_elo],
189
+ axis=1
190
+ )
191
 
192
  # grouped_results_overall = grouped_results_overall.rename(columns={'model':'Model'})
193
  # grouped_results.to_csv(f'artefacts/grouped_results_by_model.csv')
194
  grouped_dfs = {}
 
195
  for col_name in [ProblemTypes.col_name]:
196
  grouped_dfs[col_name] = group_by(df, col_name)
197
  # print(f"Grouping by {col_name}:\n {grouped_dfs.head(20)}")