Spaces:
Running
Running
File size: 8,784 Bytes
c5607c4 409ae36 90f7215 d56df1b 90f7215 0aedfd6 ccb6d6d 409ae36 f5303bc 90f7215 8fab3a5 0aedfd6 8fab3a5 5928f02 90f7215 0aedfd6 4b38e69 90f7215 ccb6d6d 8fab3a5 90f7215 c5607c4 90f7215 c5607c4 90f7215 c5607c4 90f7215 c5607c4 90f7215 5928f02 0aedfd6 90f7215 c5607c4 6f977e2 c5607c4 6f977e2 0aedfd6 6f977e2 90f7215 d56df1b 6f977e2 90f7215 4b38e69 90f7215 f7abd76 0aedfd6 90f7215 c5607c4 8fab3a5 90f7215 c5607c4 90f7215 c5607c4 90f7215 d56df1b 90f7215 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
from pathlib import Path
import pandas as pd
import os
import re
from scipy import stats
from src.constants import ProblemTypes, MetricNames
METRIC_CHOICES = [
f"eval_metrics/{MetricNames.normalized_error}",
f"eval_metrics/{MetricNames.fit_time_per_1K_rows}",
f"eval_metrics/{MetricNames.inference_time_per_1K_rows}",
]
# Define the formatting function
def format_number(num):
# Check if the value is numeric
if isinstance(num, (int, float)):
if abs(num) >= 10**2:
return f"{num:.1e}"
else:
return f"{num:.3f}"
# Return non-numeric values as-is
return num
def norm_sNavie(df):
df_normalized = df.copy()
seasonal_naive_row = df[df['model'] == 'seasonal_naive'].iloc[0]
print('df: ',df)
for column in df.columns:
if column != 'model': # We skip normalizing the 'model' column
df_normalized[column] = df[column] / seasonal_naive_row[column]
return df_normalized
def pivot_df(file_name, tab_name):
df = pd.read_csv(file_name)
if tab_name == 'univariate':
df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'})
df.rename(columns={'univariate': 'variate_type'}, inplace=True)
tab_name = 'variate_type'
df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value')
df_melted['metric'] = df_melted['metric'].replace({
'eval_metrics/MAPE[0.5]': 'MAPE',
'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS'
})
df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value')
df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns]
# df_pivot.to_csv('pivoted_df.csv')
# print(df_pivot)
df_pivot = df_pivot.reset_index()
df_pivot = df_pivot.round(3)
return df_pivot
def rename_metrics(df):
df = df.rename(columns={
f'eval_metrics/{MetricNames.normalized_error}': MetricNames.normalized_error,
f'eval_metrics/{MetricNames.inference_time_per_1K_rows}': "Inference time / 1K rows (s)",
f'eval_metrics/{MetricNames.fit_time_per_1K_rows}': "Fit time / 1K rows (s)",
})
return df
def format_df(df):
df = df.applymap(format_number)
# make sure the data type is float
df.iloc[:, 1:] = df.iloc[:, 1:].astype(float)
return df
def unify_freq(df):
# Remove all numeric characters from the 'frequency' column
df['frequency'] = df['frequency'].str.replace(r'\d+', '', regex=True)
# Remove everything after '-' if present
df['frequency'] = df['frequency'].str.split('-').str[0]
# Define the frequency conversion dictionary
freq_conversion = {
'T': 'Minutely',
'H': 'Hourly',
'D': 'Daily',
'W': 'Weekly',
'M': 'Monthly',
'Q': 'Quarterly',
'Y': 'Yearly',
'A': 'Yearly',
'S': 'Secondly'
}
# Map the cleaned 'frequency' values using the dictionary
df['frequency'] = df['frequency'].replace(freq_conversion)
return df
def pivot_existed_df(df, tab_name):
df = df.reset_index()
if tab_name == 'univariate':
df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'})
df.rename(columns={'univariate': 'variate_type'}, inplace=True)
tab_name = 'variate_type'
print('tab_name:', tab_name, 'df: ',df)
print('columns', df.columns)
df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value')
df_melted['metric'] = df_melted['metric'].replace({
'eval_metrics/normalized-error': 'normalized-error',
'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS',
'rank': 'Rank',
})
df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value')
df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns]
df_pivot = df_pivot.reset_index()
# df_pivot = df_pivot.round(3)
df_pivot = format_df(df_pivot)
# df_pivot = df_pivot.applymap(format_number)
# # make sure the data type is float
# df_pivot.iloc[:, 1:] = df_pivot.iloc[:, 1:].astype(float)
return df_pivot
def get_grouped_dfs(root_dir='results', ds_properties='results/dataset_properties.csv'):
df_list = []
# Walk through all folders and subfolders in the root directory
for csv_path in Path(root_dir).rglob("*csv"):
if 'all_results.csv' in str(csv_path):
df_list.append(pd.read_csv(csv_path))
# Concatenate all dataframes into one
all_results_df = pd.concat(df_list, ignore_index=True)
all_results_df = all_results_df.sort_values(by=['model', 'dataset']).reset_index(drop=True)
dataset_properties = pd.read_csv(ds_properties)
# Reforemat the first element of each row after the header following these rules:
# 1. make all characters lowercase
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.lower())
# 2. replace all spaces with underscores
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace(' ', '_'))
# 3. Replace all dashes with underscores
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace('-', '_'))
# 4. Replace consecutive underscores with a single underscore. There maybe more than 2 consecutive underscores
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: re.sub('_+', '_', x))
# 5. Remove all leading and trailing underscores
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.strip('_'))
df = all_results_df
# convert it to a dictionary, with dataset as the key, and the value as another dictionary. The inner dictionary has the column names as the key, and the value as the value.
dataset_properties_dict = dataset_properties.set_index('dataset').T.to_dict('dict')
# match the dataset name in model_properties_dict with the dataset name in df and add a new column for each key value pair in the inner dictionary.
for dataset in dataset_properties_dict.keys():
for key in dataset_properties_dict[dataset].keys():
df.loc[df['dataset'] == dataset, key] = dataset_properties_dict[dataset][key]
# unify the frequency
# df = unify_freq(df)
# standardize by seasonal naive
df = standardize_df(df)
# TODO compute normalized error
# TODO change to ELO
RANKING_METRIC = "eval_metrics/normalized-error"
# compute metrics that requires all methods results such as Rank and Elo.
df['rank'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first', ascending=True)
df['ELO'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first',
ascending=True) * 100
# group by domain
grouped_results_overall = df.groupby(['model'])[METRIC_CHOICES].agg(stats.gmean)
grouped_results_overall_rank = df.groupby(['model'])[['rank']].mean()
grouped_results_overall_elo = df.groupby(['model'])[['ELO']].mean()
grouped_results_overall = pd.concat([
grouped_results_overall,
grouped_results_overall_rank,
grouped_results_overall_elo],
axis=1
)
# grouped_results_overall = grouped_results_overall.rename(columns={'model':'Model'})
# grouped_results.to_csv(f'artefacts/grouped_results_by_model.csv')
grouped_dfs = {}
# for col_name in ["domain", 'term_length', 'frequency', 'univariate']:
for col_name in [ProblemTypes.col_name]:
grouped_dfs[col_name] = group_by(df, col_name)
# print(f"Grouping by {col_name}:\n {grouped_dfs.head(20)}")
grouped_dfs['overall'] = grouped_results_overall
return grouped_dfs
def standardize_df(df):
raw_metric = f'eval_metrics/{MetricNames.raw_error}'
# Perform min-max normalization. We may want to do something more outlier robust like what is done
# in Tabrepo by doing (x - x.min()) / (x.median() - x.min())
df[f'eval_metrics/{MetricNames.normalized_error}'] = df.groupby('dataset')[raw_metric].transform(
lambda x: (x - x.min()) / (x.max() - x.min())
)
return df
def group_by(df, col_name):
grouped_results = df.groupby([col_name, 'model'])[METRIC_CHOICES].agg(stats.gmean)
grouped_results_rank = df.groupby([col_name, 'model'])[['rank']].mean()
grouped_results = pd.concat([grouped_results, grouped_results_rank], axis=1)
# Display the results
# Write the results to a csv file
# grouped_results.to_csv(f'grouped_results_by_{col_name}.csv')
return grouped_results |