|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from scipy.optimize import curve_fit
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
import plotly.graph_objects as go
|
|
from utilities import (channel_name_formating)
|
|
|
|
|
|
df= pd.read_excel('response_curves_input_file.xlsx')
|
|
df.dropna(inplace=True)
|
|
df['Date'] = pd.to_datetime(df['Date'])
|
|
df.reset_index(inplace=True)
|
|
|
|
channel_cols = [
|
|
'BroadcastTV',
|
|
'CableTV',
|
|
'Connected&OTTTV',
|
|
'DisplayProspecting',
|
|
'DisplayRetargeting',
|
|
'Video',
|
|
'SocialProspecting',
|
|
'SocialRetargeting',
|
|
'SearchBrand',
|
|
'SearchNon-brand',
|
|
'DigitalPartners',
|
|
'Audio',
|
|
'Email']
|
|
spend_cols = [
|
|
'tv_broadcast_spend',
|
|
'tv_cable_spend',
|
|
'stream_video_spend',
|
|
'disp_prospect_spend',
|
|
'disp_retarget_spend',
|
|
'olv_spend',
|
|
'social_prospect_spend',
|
|
'social_retarget_spend',
|
|
'search_brand_spend',
|
|
'search_nonbrand_spend',
|
|
'cm_spend',
|
|
'audio_spend',
|
|
'email_spend']
|
|
prospect_cols = [
|
|
'Broadcast TV_Prospects',
|
|
'Cable TV_Prospects',
|
|
'Connected & OTT TV_Prospects',
|
|
'Display Prospecting_Prospects',
|
|
'Display Retargeting_Prospects',
|
|
'Video_Prospects',
|
|
'Social Prospecting_Prospects',
|
|
'Social Retargeting_Prospects',
|
|
'Search Brand_Prospects',
|
|
'Search Non-brand_Prospects',
|
|
'Digital Partners_Prospects',
|
|
'Audio_Prospects',
|
|
'Email_Prospects']
|
|
|
|
def hill_equation(x, Kd, n):
|
|
return x**n / (Kd**n + x**n)
|
|
|
|
|
|
def hill_func(x_data,y_data,x_minmax,y_minmax):
|
|
|
|
initial_guess = [1, 1]
|
|
params, covariance = curve_fit(hill_equation, x_data, y_data, p0=initial_guess,maxfev = 1000)
|
|
|
|
|
|
Kd_fit, n_fit = params
|
|
|
|
|
|
|
|
y_fit = hill_equation(x_data, Kd_fit, n_fit)
|
|
|
|
x_data_inv = x_minmax.inverse_transform(np.array(x_data).reshape(-1,1))
|
|
y_data_inv = y_minmax.inverse_transform(np.array(y_data).reshape(-1,1))
|
|
y_fit_inv = y_minmax.inverse_transform(np.array(y_fit).reshape(-1,1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return y_fit,y_fit_inv,Kd_fit, n_fit
|
|
|
|
def data_output(channel,X,y,y_fit_inv,x_ext_data,y_fit_inv_ext):
|
|
fit_col = 'Fit_Data_'+channel
|
|
plot_df = pd.DataFrame()
|
|
|
|
plot_df[f'{channel}_Spends'] = X
|
|
|
|
plot_df['Date'] = df['Date']
|
|
plot_df['MAT'] = df['MAT']
|
|
|
|
|
|
|
|
y_fit_inv_v2 = []
|
|
for i in range(len(y_fit_inv)):
|
|
y_fit_inv_v2.append(y_fit_inv[i][0])
|
|
|
|
plot_df[fit_col] = y_fit_inv_v2
|
|
|
|
|
|
|
|
y_fit_inv_v2_ext = []
|
|
for i in range(len(y_fit_inv_ext)):
|
|
y_fit_inv_v2_ext.append(y_fit_inv_ext[i][0])
|
|
|
|
|
|
ext_df = pd.DataFrame()
|
|
ext_df[f'{channel}_Spends'] = x_ext_data
|
|
ext_df[fit_col] = y_fit_inv_v2_ext
|
|
|
|
ext_df['Date'] = [
|
|
np.datetime64('1950-01-01'),
|
|
np.datetime64('1950-06-15'),
|
|
np.datetime64('1950-12-31')
|
|
]
|
|
|
|
ext_df['MAT'] = ["ext","ext","ext"]
|
|
|
|
|
|
plot_df= plot_df.append(ext_df)
|
|
return plot_df
|
|
|
|
def input_data(df,spend_col,prospect_col):
|
|
X = np.array(df[spend_col].tolist())
|
|
y = np.array(df[prospect_col].tolist())
|
|
|
|
x_minmax = MinMaxScaler()
|
|
x_scaled = x_minmax.fit_transform(df[[spend_col]])
|
|
x_data = []
|
|
for i in range(len(x_scaled)):
|
|
x_data.append(x_scaled[i][0])
|
|
|
|
y_minmax = MinMaxScaler()
|
|
y_scaled = y_minmax.fit_transform(df[[prospect_col]])
|
|
y_data = []
|
|
for i in range(len(y_scaled)):
|
|
y_data.append(y_scaled[i][0])
|
|
|
|
return X,y,x_data,y_data,x_minmax,y_minmax
|
|
|
|
def extend_s_curve(x_max,x_minmax,y_minmax, Kd_fit, n_fit):
|
|
|
|
x_ext_data = [x_max*1.2,x_max*1.3,x_max*1.5]
|
|
|
|
|
|
x_scaled = x_minmax.transform(pd.DataFrame(x_ext_data))
|
|
x_data = []
|
|
for i in range(len(x_scaled)):
|
|
x_data.append(x_scaled[i][0])
|
|
|
|
|
|
y_fit = hill_equation(x_data, Kd_fit, n_fit)
|
|
y_fit_inv = y_minmax.inverse_transform(np.array(y_fit).reshape(-1,1))
|
|
|
|
return x_ext_data,y_fit_inv
|
|
|
|
def fit_data(spend_col,prospect_col,channel):
|
|
|
|
temp_df = df[df[spend_col]>0]
|
|
temp_df.reset_index(inplace=True)
|
|
|
|
X,y,x_data,y_data,x_minmax,y_minmax = input_data(temp_df,spend_col,prospect_col)
|
|
y_fit, y_fit_inv, Kd_fit, n_fit = hill_func(x_data,y_data,x_minmax,y_minmax)
|
|
|
|
|
|
|
|
|
|
x_ext_data,y_fit_inv_ext= extend_s_curve(temp_df[spend_col].max(),x_minmax,y_minmax, Kd_fit, n_fit)
|
|
|
|
plot_df = data_output(channel,X,y,y_fit_inv,x_ext_data,y_fit_inv_ext)
|
|
return plot_df
|
|
|
|
plotly_data = fit_data(spend_cols[0],prospect_cols[0],channel_cols[0])
|
|
plotly_data.tail()
|
|
|
|
for i in range(1,13):
|
|
|
|
pdf = fit_data(spend_cols[i],prospect_cols[i],channel_cols[i])
|
|
plotly_data = plotly_data.merge(pdf,on = ["Date","MAT"],how = "left")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
from scipy.optimize import curve_fit
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
import plotly.graph_objects as go
|
|
|
|
|
|
df= pd.read_excel('response_curves_input_file.xlsx')
|
|
df.dropna(inplace=True)
|
|
df['Date'] = pd.to_datetime(df['Date'])
|
|
df.reset_index(inplace=True)
|
|
|
|
channel_cols = [
|
|
'BroadcastTV',
|
|
'CableTV',
|
|
'Connected&OTTTV',
|
|
'DisplayProspecting',
|
|
'DisplayRetargeting',
|
|
'Video',
|
|
'SocialProspecting',
|
|
'SocialRetargeting',
|
|
'SearchBrand',
|
|
'SearchNon-brand',
|
|
'DigitalPartners',
|
|
'Audio',
|
|
'Email']
|
|
spend_cols = [
|
|
'tv_broadcast_spend',
|
|
'tv_cable_spend',
|
|
'stream_video_spend',
|
|
'disp_prospect_spend',
|
|
'disp_retarget_spend',
|
|
'olv_spend',
|
|
'social_prospect_spend',
|
|
'social_retarget_spend',
|
|
'search_brand_spend',
|
|
'search_nonbrand_spend',
|
|
'cm_spend',
|
|
'audio_spend',
|
|
'email_spend']
|
|
prospect_cols = [
|
|
'Broadcast TV_Prospects',
|
|
'Cable TV_Prospects',
|
|
'Connected & OTT TV_Prospects',
|
|
'Display Prospecting_Prospects',
|
|
'Display Retargeting_Prospects',
|
|
'Video_Prospects',
|
|
'Social Prospecting_Prospects',
|
|
'Social Retargeting_Prospects',
|
|
'Search Brand_Prospects',
|
|
'Search Non-brand_Prospects',
|
|
'Digital Partners_Prospects',
|
|
'Audio_Prospects',
|
|
'Email_Prospects']
|
|
|
|
def hill_equation(x, Kd, n):
|
|
return x**n / (Kd**n + x**n)
|
|
|
|
|
|
def hill_func(x_data,y_data,x_minmax,y_minmax):
|
|
|
|
initial_guess = [1, 1]
|
|
params, covariance = curve_fit(hill_equation, x_data, y_data, p0=initial_guess,maxfev = 1000)
|
|
|
|
|
|
Kd_fit, n_fit = params
|
|
|
|
|
|
|
|
y_fit = hill_equation(x_data, Kd_fit, n_fit)
|
|
|
|
x_data_inv = x_minmax.inverse_transform(np.array(x_data).reshape(-1,1))
|
|
y_data_inv = y_minmax.inverse_transform(np.array(y_data).reshape(-1,1))
|
|
y_fit_inv = y_minmax.inverse_transform(np.array(y_fit).reshape(-1,1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return y_fit,y_fit_inv,Kd_fit, n_fit
|
|
|
|
def data_output(channel,X,y,y_fit_inv,x_ext_data,y_fit_inv_ext):
|
|
fit_col = 'Fit_Data_'+channel
|
|
plot_df = pd.DataFrame()
|
|
|
|
plot_df[f'{channel}_Spends'] = X
|
|
|
|
plot_df['Date'] = df['Date']
|
|
plot_df['MAT'] = df['MAT']
|
|
|
|
|
|
|
|
y_fit_inv_v2 = []
|
|
for i in range(len(y_fit_inv)):
|
|
y_fit_inv_v2.append(y_fit_inv[i][0])
|
|
|
|
plot_df[fit_col] = y_fit_inv_v2
|
|
|
|
|
|
|
|
y_fit_inv_v2_ext = []
|
|
for i in range(len(y_fit_inv_ext)):
|
|
y_fit_inv_v2_ext.append(y_fit_inv_ext[i][0])
|
|
|
|
|
|
ext_df = pd.DataFrame()
|
|
ext_df[f'{channel}_Spends'] = x_ext_data
|
|
ext_df[fit_col] = y_fit_inv_v2_ext
|
|
|
|
ext_df['Date'] = [
|
|
np.datetime64('1950-01-01'),
|
|
np.datetime64('1950-06-15'),
|
|
np.datetime64('1950-12-31')
|
|
]
|
|
|
|
ext_df['MAT'] = ["ext","ext","ext"]
|
|
|
|
|
|
plot_df= plot_df.append(ext_df)
|
|
return plot_df
|
|
|
|
def input_data(df,spend_col,prospect_col):
|
|
X = np.array(df[spend_col].tolist())
|
|
y = np.array(df[prospect_col].tolist())
|
|
|
|
x_minmax = MinMaxScaler()
|
|
x_scaled = x_minmax.fit_transform(df[[spend_col]])
|
|
x_data = []
|
|
for i in range(len(x_scaled)):
|
|
x_data.append(x_scaled[i][0])
|
|
|
|
y_minmax = MinMaxScaler()
|
|
y_scaled = y_minmax.fit_transform(df[[prospect_col]])
|
|
y_data = []
|
|
for i in range(len(y_scaled)):
|
|
y_data.append(y_scaled[i][0])
|
|
|
|
return X,y,x_data,y_data,x_minmax,y_minmax
|
|
|
|
def extend_s_curve(x_max,x_minmax,y_minmax, Kd_fit, n_fit):
|
|
|
|
x_ext_data = [x_max*1.2,x_max*1.3,x_max*1.5]
|
|
|
|
|
|
x_scaled = x_minmax.transform(pd.DataFrame(x_ext_data))
|
|
x_data = []
|
|
for i in range(len(x_scaled)):
|
|
x_data.append(x_scaled[i][0])
|
|
|
|
|
|
y_fit = hill_equation(x_data, Kd_fit, n_fit)
|
|
y_fit_inv = y_minmax.inverse_transform(np.array(y_fit).reshape(-1,1))
|
|
|
|
return x_ext_data,y_fit_inv
|
|
|
|
def fit_data(spend_col,prospect_col,channel):
|
|
|
|
temp_df = df[df[spend_col]>0]
|
|
temp_df.reset_index(inplace=True)
|
|
|
|
X,y,x_data,y_data,x_minmax,y_minmax = input_data(temp_df,spend_col,prospect_col)
|
|
y_fit, y_fit_inv, Kd_fit, n_fit = hill_func(x_data,y_data,x_minmax,y_minmax)
|
|
|
|
|
|
|
|
|
|
x_ext_data,y_fit_inv_ext= extend_s_curve(temp_df[spend_col].max(),x_minmax,y_minmax, Kd_fit, n_fit)
|
|
|
|
plot_df = data_output(channel,X,y,y_fit_inv,x_ext_data,y_fit_inv_ext)
|
|
return plot_df
|
|
|
|
plotly_data = fit_data(spend_cols[0],prospect_cols[0],channel_cols[0])
|
|
plotly_data.tail()
|
|
|
|
for i in range(1,13):
|
|
|
|
pdf = fit_data(spend_cols[i],prospect_cols[i],channel_cols[i])
|
|
plotly_data = plotly_data.merge(pdf,on = ["Date","MAT"],how = "left")
|
|
|
|
def response_curves(channel,x_modified,y_modified):
|
|
|
|
|
|
fig = go.Figure()
|
|
|
|
x_col = (channel+"_Spends").replace('\xa0', '')
|
|
y_col = ("Fit_Data_"+channel).replace('\xa0', '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plotly_data2 = plotly_data.copy()
|
|
plotly_data2 = plotly_data[plotly_data[x_col].isnull()==False]
|
|
plotly_data2 = plotly_data2[plotly_data2["MAT"]!="ext"]
|
|
|
|
x_actual = np.array(plotly_data2[x_col].mean())
|
|
y_actual = np.array(plotly_data2[y_col].mean())
|
|
|
|
|
|
plotly_data1 = plotly_data[(plotly_data["MAT"] != "ext")]
|
|
|
|
|
|
plotly_data1 = plotly_data1.sort_values(by=x_col, ascending=True)
|
|
dividing_parameter = len(plotly_data1[plotly_data1[x_col].isnull()==False])
|
|
|
|
|
|
x_mod = x_modified/dividing_parameter
|
|
y_mod = y_modified/dividing_parameter
|
|
|
|
|
|
x_limit = 1.2 * max(x_actual, x_mod)
|
|
|
|
|
|
plotly_data1 = plotly_data[(plotly_data["MAT"] != "ext") &
|
|
(plotly_data[x_col] <= x_limit)]
|
|
plotly_data1 = plotly_data1.sort_values(by=x_col, ascending=True)
|
|
|
|
|
|
fig.add_trace(go.Scatter(
|
|
x=plotly_data1[x_col],
|
|
y=plotly_data1[y_col],
|
|
mode='lines',
|
|
marker=dict(color='blue'),
|
|
name=x_col.replace('_Spends', '')
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fig.add_trace(go.Scatter(
|
|
x=x_actual,
|
|
y=y_actual,
|
|
mode='markers',
|
|
marker=dict(
|
|
size=13
|
|
, color = '#516DA6'
|
|
),
|
|
name="Current Spends"
|
|
))
|
|
|
|
|
|
fig.add_trace(go.Scatter(
|
|
x=[x_mod],
|
|
y=[y_mod],
|
|
mode='markers',
|
|
marker=dict(
|
|
size=13
|
|
, color = '#4ACAD9'
|
|
),
|
|
name="Optimised Spends"
|
|
))
|
|
|
|
|
|
fig.update_layout(
|
|
title={
|
|
'text': channel_name_formating(channel)+' Response Curve',
|
|
'font': {
|
|
'size': 24,
|
|
'family': 'Arial',
|
|
'color': 'black',
|
|
|
|
}
|
|
},
|
|
|
|
xaxis_title='Weekly Spends',
|
|
yaxis_title='Prospects'
|
|
)
|
|
|
|
|
|
return fig
|
|
|
|
|