Spaces:
Running
Running
File size: 3,661 Bytes
9daaf0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
from dataclasses import dataclass
import json
from pathlib import Path
import pandas as pd
import numpy as np
from src.constants import MethodTypes, ProblemTypes, DatasetInfo, MetricNames, DatasetInfoRow
np.random.seed(0)
def generate_dummy_data(method: str, model_type: str, datasets: list[DatasetInfoRow]):
# Create a sample DataFrame with schema
# "dataset,model,eval_metrics/normalized-error,eval_metrics/rank,eval_metrics/ELO,eval_metrics/fit_time_per_1K_rows,eval_metrics/inference_time_per_1K_rows,domain,num_variates"
rows = []
for dataset in datasets:
rows.append({
"dataset": dataset.name,
"model": method,
f"{MetricNames.raw_error}": np.random.rand(),
f"{MetricNames.fit_time_per_1K_rows}": np.random.randint(1, 100),
f"{MetricNames.inference_time_per_1K_rows}": np.random.randint(1, 100),
})
df = pd.DataFrame(rows)
result_path = Path(__file__).parent.parent / "results" / method
csv_path = result_path / f"all_results.csv"
csv_path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(csv_path, index=False)
with open(result_path / "config.json", "w") as f:
f.write(json.dumps(
{
"model": method,
MethodTypes.col_name: model_type,
}
))
return df
def get_model_type(model: str):
pattern_families = {
"tuned": MethodTypes.automl,
"tuned + ensemble": MethodTypes.automl,
"FT": MethodTypes.finetuned,
"AutoGluon": MethodTypes.automl,
"Autosklearn2": MethodTypes.automl,
"CAT": MethodTypes.tree,
"EBM": MethodTypes.other,
"FASTAI": MethodTypes.finetuned,
"FT_TRANSFORMER": MethodTypes.finetuned,
"GBM": MethodTypes.tree,
"KNN": MethodTypes.other,
"REALMLP": MethodTypes.finetuned,
"RF": MethodTypes.tree,
"XGB": MethodTypes.tree,
"XT": MethodTypes.tree,
}
for pattern, family in pattern_families.items():
if pattern in model:
return family
return MethodTypes.other
if __name__ == "__main__":
tabrepo_results_root = Path("~/Downloads/tabrepo_temp_results").expanduser()
results_root = Path(__file__).parent.parent / "results"
results_root.mkdir(exist_ok=True)
df_datasets = pd.read_csv(tabrepo_results_root / "dataset_properties.csv")
df_datasets.to_csv(results_root / "dataset_properties.csv", index=False)
df_models = pd.read_csv(tabrepo_results_root / "all_results.csv")
# For now discard tuned and ensemble
df_models = df_models[~df_models.model.str.contains("tuned")]
for model in df_models.loc[:, "model"].unique():
result_path = results_root / model
result_path.mkdir(exist_ok=True)
model_type = get_model_type(model)
df_all_results = df_models.loc[df_models.model == model].copy()
# dataset,model,raw-error,fit-time-per-1K-rows,inference-time-per-1K-rows
df_all_results.rename(columns={
"dataset": "dataset",
"model": "model",
"metric_error": "raw-error",
# TODO divide by number of rows and multiply by 1K
"time_train_s": "fit-time-per-1K-rows",
"time_infer_s": "inference-time-per-1K-rows",
}, inplace=True)
df_all_results.to_csv(result_path / "all_results.csv", index=False)
with open(result_path / "config.json", "w") as f:
f.write(json.dumps(
{
"model": model,
MethodTypes.col_name: model_type,
}
)) |