Spaces:
Running
Running
from pathlib import Path | |
import pandas as pd | |
import os | |
import re | |
from scipy import stats | |
from src.constants import ProblemTypes, MetricNames | |
METRIC_CHOICES = [ | |
f"eval_metrics/{MetricNames.normalized_error}", | |
f"eval_metrics/{MetricNames.fit_time_per_1K_rows}", | |
f"eval_metrics/{MetricNames.inference_time_per_1K_rows}", | |
] | |
# Define the formatting function | |
def format_number(num): | |
# Check if the value is numeric | |
if isinstance(num, (int, float)): | |
if abs(num) >= 10**2: | |
return f"{num:.1e}" | |
else: | |
return f"{num:.3f}" | |
# Return non-numeric values as-is | |
return num | |
def norm_sNavie(df): | |
df_normalized = df.copy() | |
seasonal_naive_row = df[df['model'] == 'seasonal_naive'].iloc[0] | |
print('df: ',df) | |
for column in df.columns: | |
if column != 'model': # We skip normalizing the 'model' column | |
df_normalized[column] = df[column] / seasonal_naive_row[column] | |
return df_normalized | |
def pivot_df(file_name, tab_name): | |
df = pd.read_csv(file_name) | |
if tab_name == 'univariate': | |
df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'}) | |
df.rename(columns={'univariate': 'variate_type'}, inplace=True) | |
tab_name = 'variate_type' | |
df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value') | |
df_melted['metric'] = df_melted['metric'].replace({ | |
'eval_metrics/MAPE[0.5]': 'MAPE', | |
'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS' | |
}) | |
df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value') | |
df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns] | |
# df_pivot.to_csv('pivoted_df.csv') | |
# print(df_pivot) | |
df_pivot = df_pivot.reset_index() | |
df_pivot = df_pivot.round(3) | |
return df_pivot | |
def rename_metrics(df): | |
df = df.rename(columns={ | |
f'eval_metrics/{MetricNames.normalized_error}': MetricNames.normalized_error, | |
f'eval_metrics/{MetricNames.inference_time_per_1K_rows}': "Inference time / 1K rows (s)", | |
f'eval_metrics/{MetricNames.fit_time_per_1K_rows}': "Fit time / 1K rows (s)", | |
}) | |
return df | |
def format_df(df): | |
df = df.applymap(format_number) | |
# make sure the data type is float | |
df.iloc[:, 1:] = df.iloc[:, 1:].astype(float) | |
return df | |
def unify_freq(df): | |
# Remove all numeric characters from the 'frequency' column | |
df['frequency'] = df['frequency'].str.replace(r'\d+', '', regex=True) | |
# Remove everything after '-' if present | |
df['frequency'] = df['frequency'].str.split('-').str[0] | |
# Define the frequency conversion dictionary | |
freq_conversion = { | |
'T': 'Minutely', | |
'H': 'Hourly', | |
'D': 'Daily', | |
'W': 'Weekly', | |
'M': 'Monthly', | |
'Q': 'Quarterly', | |
'Y': 'Yearly', | |
'A': 'Yearly', | |
'S': 'Secondly' | |
} | |
# Map the cleaned 'frequency' values using the dictionary | |
df['frequency'] = df['frequency'].replace(freq_conversion) | |
return df | |
def pivot_existed_df(df, tab_name): | |
df = df.reset_index() | |
if tab_name == 'univariate': | |
df['univariate'] = df['univariate'].replace({True: 'univariate', False: 'multivariate'}) | |
df.rename(columns={'univariate': 'variate_type'}, inplace=True) | |
tab_name = 'variate_type' | |
print('tab_name:', tab_name, 'df: ',df) | |
print('columns', df.columns) | |
df_melted = pd.melt(df, id_vars=[tab_name, 'model'], var_name='metric', value_name='value') | |
df_melted['metric'] = df_melted['metric'].replace({ | |
'eval_metrics/normalized-error': 'normalized-error', | |
'eval_metrics/mean_weighted_sum_quantile_loss': 'CRPS', | |
'rank': 'Rank', | |
}) | |
df_pivot = df_melted.pivot_table(index='model', columns=[tab_name, 'metric'], values='value') | |
df_pivot.columns = [f'{tab_name} ({metric})' for tab_name, metric in df_pivot.columns] | |
df_pivot = df_pivot.reset_index() | |
# df_pivot = df_pivot.round(3) | |
df_pivot = format_df(df_pivot) | |
# df_pivot = df_pivot.applymap(format_number) | |
# # make sure the data type is float | |
# df_pivot.iloc[:, 1:] = df_pivot.iloc[:, 1:].astype(float) | |
return df_pivot | |
def get_grouped_dfs(root_dir='results', ds_properties='results/dataset_properties.csv'): | |
df_list = [] | |
# Walk through all folders and subfolders in the root directory | |
for csv_path in Path(root_dir).rglob("*csv"): | |
if 'all_results.csv' in str(csv_path): | |
df_list.append(pd.read_csv(csv_path)) | |
# Concatenate all dataframes into one | |
all_results_df = pd.concat(df_list, ignore_index=True) | |
all_results_df = all_results_df.sort_values(by=['model', 'dataset']).reset_index(drop=True) | |
dataset_properties = pd.read_csv(ds_properties) | |
# Reforemat the first element of each row after the header following these rules: | |
# 1. make all characters lowercase | |
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.lower()) | |
# 2. replace all spaces with underscores | |
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace(' ', '_')) | |
# 3. Replace all dashes with underscores | |
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.replace('-', '_')) | |
# 4. Replace consecutive underscores with a single underscore. There maybe more than 2 consecutive underscores | |
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: re.sub('_+', '_', x)) | |
# 5. Remove all leading and trailing underscores | |
dataset_properties['dataset'] = dataset_properties['dataset'].apply(lambda x: x.strip('_')) | |
df = all_results_df | |
# convert it to a dictionary, with dataset as the key, and the value as another dictionary. The inner dictionary has the column names as the key, and the value as the value. | |
dataset_properties_dict = dataset_properties.set_index('dataset').T.to_dict('dict') | |
# match the dataset name in model_properties_dict with the dataset name in df and add a new column for each key value pair in the inner dictionary. | |
for dataset in dataset_properties_dict.keys(): | |
for key in dataset_properties_dict[dataset].keys(): | |
df.loc[df['dataset'] == dataset, key] = dataset_properties_dict[dataset][key] | |
# unify the frequency | |
# df = unify_freq(df) | |
# standardize by seasonal naive | |
df = standardize_df(df) | |
# TODO compute normalized error | |
# TODO change to ELO | |
RANKING_METRIC = "eval_metrics/normalized-error" | |
# compute metrics that requires all methods results such as Rank and Elo. | |
df['rank'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first', ascending=True) | |
df['ELO'] = df.groupby(['dataset', ProblemTypes.col_name])[f'{RANKING_METRIC}'].rank(method='first', | |
ascending=True) * 100 | |
# group by domain | |
grouped_results_overall = df.groupby(['model'])[METRIC_CHOICES].agg(stats.gmean) | |
grouped_results_overall_rank = df.groupby(['model'])[['rank']].mean() | |
grouped_results_overall_elo = df.groupby(['model'])[['ELO']].mean() | |
grouped_results_overall = pd.concat([ | |
grouped_results_overall, | |
grouped_results_overall_rank, | |
grouped_results_overall_elo], | |
axis=1 | |
) | |
# grouped_results_overall = grouped_results_overall.rename(columns={'model':'Model'}) | |
# grouped_results.to_csv(f'artefacts/grouped_results_by_model.csv') | |
grouped_dfs = {} | |
# for col_name in ["domain", 'term_length', 'frequency', 'univariate']: | |
for col_name in [ProblemTypes.col_name]: | |
grouped_dfs[col_name] = group_by(df, col_name) | |
# print(f"Grouping by {col_name}:\n {grouped_dfs.head(20)}") | |
grouped_dfs['overall'] = grouped_results_overall | |
return grouped_dfs | |
def standardize_df(df): | |
raw_metric = f'eval_metrics/{MetricNames.raw_error}' | |
# Perform min-max normalization. We may want to do something more outlier robust like what is done | |
# in Tabrepo by doing (x - x.min()) / (x.median() - x.min()) | |
df[f'eval_metrics/{MetricNames.normalized_error}'] = df.groupby('dataset')[raw_metric].transform( | |
lambda x: (x - x.min()) / (x.max() - x.min()) | |
) | |
return df | |
def group_by(df, col_name): | |
grouped_results = df.groupby([col_name, 'model'])[METRIC_CHOICES].agg(stats.gmean) | |
grouped_results_rank = df.groupby([col_name, 'model'])[['rank']].mean() | |
grouped_results = pd.concat([grouped_results, grouped_results_rank], axis=1) | |
# Display the results | |
# Write the results to a csv file | |
# grouped_results.to_csv(f'grouped_results_by_{col_name}.csv') | |
return grouped_results |