Spaces:
Sleeping
Sleeping
File size: 1,881 Bytes
43e3ffb fa10c3d 43e3ffb fa10c3d 43e3ffb fa10c3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import pandas as pd
from src.utils.helper_functions import save_parquet, load_parquet
from config import Config
config = vars(Config)
def prepare_data(
dataframe,
data,
split_local_test,
add_datetime_features=True,
add_lag_features=True
):
print('Building features...')
if add_datetime_features:
dataframe = datetime_features(dataframe)
if add_lag_features:
dataframe = lag_features(dataframe, data, split_local_test)
return dataframe
def lag_features(dataframe, data, split_local_test):
if split_local_test:
backlog_cols = [col for col in data.columns if col.endswith('_backlog')]
lag_backlog_cols = []
for col in backlog_cols:
for shift in range(9,13,1):
shift_col_name = f'{col}_shift_{shift}'
data.loc[:, shift_col_name] = data.groupby('product_id')[col].shift(shift)
lag_backlog_cols.append(shift_col_name)
save_parquet(
dataframe=data[lag_backlog_cols + ['product_id','date']],
path=f'{config["fold_input_directory"]}/shift_features.parquet'
)
map_data = data[lag_backlog_cols + ['product_id','date']]
else:
map_data = load_parquet(f'{config["fold_input_directory"]}/shift_features.parquet')
dataframe = pd.merge(dataframe, map_data, how='left', on=['product_id','date'])
return dataframe
def datetime_features(dataframe, date='date', suffix=''):
dataframe[f'{suffix}_month'] = dataframe[date].dt.month
dataframe[f'{suffix}_year'] = dataframe[date].dt.year
dataframe[f'{suffix}_quarter'] = dataframe[date].dt.quarter
dataframe[f'{suffix}_weekofyear'] = dataframe[date].dt.isocalendar().week
return dataframe |