File size: 1,881 Bytes
43e3ffb
 
 
 
 
 
fa10c3d
 
43e3ffb
 
 
 
fa10c3d
 
 
 
 
 
 
43e3ffb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa10c3d
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
from src.utils.helper_functions import save_parquet, load_parquet
from config import Config

config = vars(Config)

def prepare_data(

        dataframe,

        data,

        split_local_test,

        add_datetime_features=True,

        add_lag_features=True

    ):

    print('Building features...')
    
    if add_datetime_features:
        dataframe = datetime_features(dataframe)

    if add_lag_features:
        dataframe = lag_features(dataframe, data, split_local_test)


    return dataframe

def lag_features(dataframe, data, split_local_test):

    if split_local_test:

        backlog_cols = [col for col in data.columns if col.endswith('_backlog')]

        lag_backlog_cols = []
        for col in backlog_cols:
            for shift in range(9,13,1):
                
                shift_col_name = f'{col}_shift_{shift}'
                data.loc[:, shift_col_name] = data.groupby('product_id')[col].shift(shift)
                
                lag_backlog_cols.append(shift_col_name)

        save_parquet(
            dataframe=data[lag_backlog_cols + ['product_id','date']],
            path=f'{config["fold_input_directory"]}/shift_features.parquet'
        )

        map_data = data[lag_backlog_cols + ['product_id','date']]
    else:
        map_data = load_parquet(f'{config["fold_input_directory"]}/shift_features.parquet')

    dataframe = pd.merge(dataframe, map_data, how='left', on=['product_id','date'])

    return dataframe

def datetime_features(dataframe, date='date', suffix=''):

    dataframe[f'{suffix}_month'] = dataframe[date].dt.month
    dataframe[f'{suffix}_year'] = dataframe[date].dt.year
    dataframe[f'{suffix}_quarter'] = dataframe[date].dt.quarter
    dataframe[f'{suffix}_weekofyear'] = dataframe[date].dt.isocalendar().week

    return dataframe