MediaMixOptimization / ppt_utils.py
samkeet's picture
Upload 40 files
00b00eb verified
raw
history blame
62 kB
import pandas as pd
import numpy as np
import pptx
from pptx import Presentation
from pptx.chart.data import CategoryChartData, ChartData
from pptx.enum.chart import XL_CHART_TYPE, XL_LEGEND_POSITION, XL_LABEL_POSITION
from pptx.enum.chart import XL_TICK_LABEL_POSITION
from pptx.util import Inches, Pt
import os
import pickle
from pathlib import Path
from sklearn.metrics import (
mean_absolute_error,
r2_score,
mean_absolute_percentage_error,
)
import streamlit as st
from collections import OrderedDict
from utilities import get_metrics_names, initialize_data, retrieve_pkl_object_without_warning
from io import BytesIO
from pptx.dml.color import RGBColor
from post_gres_cred import db_cred
schema=db_cred['schema']
from constants import (
TITLE_FONT_SIZE,
AXIS_LABEL_FONT_SIZE,
CHART_TITLE_FONT_SIZE,
AXIS_TITLE_FONT_SIZE,
DATA_LABEL_FONT_SIZE,
LEGEND_FONT_SIZE,
PIE_LEGEND_FONT_SIZE
)
def format_response_metric(target):
if target.startswith('response_metric_'):
target = target.replace('response_metric_', '')
target = target.replace("_", " ").title()
return target
def smape(actual, forecast):
# Symmetric Mape (SMAPE) eliminates shortcomings of MAPE :
## 1. MAPE becomes insanely high when actual is close to 0
## 2. MAPE is more favourable to underforecast than overforecast
return (1 / len(actual)) * np.sum(1 * np.abs(forecast - actual) / (np.abs(actual) + np.abs(forecast)))
def safe_num_to_per(num):
try:
return "{:.0%}".format(num)
except:
return num
# Function to convert numbers to abbreviated format
def convert_number_to_abbreviation(number):
try:
number = float(number)
if number >= 1000000:
return f'{number / 1000000:.1f} M'
elif number >= 1000:
return f'{number / 1000:.1f} K'
else:
return str(number)
except:
return number
def round_off(x, round_off_decimal=0):
# round off
try:
x = float(x)
if x < 1 and x > 0:
round_off_decimal = int(np.floor(np.abs(np.log10(x)))) + max(round_off_decimal, 1)
x = np.round(x, round_off_decimal)
elif x < 0 and x > -1:
round_off_decimal = int(np.floor(np.abs(np.log10(np.abs(x))))) + max(round_off_decimal, 1)
x = -np.round(x, round_off_decimal)
else:
x = np.round(x, round_off_decimal)
return x
except:
return x
def fill_table_placeholder(table_placeholder, slide, df, column_width=None, table_height=None):
cols = len(df.columns)
rows = len(df)
if table_height is None:
table_height = table_placeholder.height
x, y, cx, cy = table_placeholder.left, table_placeholder.top, table_placeholder.width, table_height
table = slide.shapes.add_table(rows + 1, cols, x, y, cx, cy).table
# Populate the table with data from the DataFrame
for row_idx, row in enumerate(df.values):
for col_idx, value in enumerate(row):
cell = table.cell(row_idx + 1, col_idx)
cell.text = str(value)
for col_idx, value in enumerate(df.columns):
cell = table.cell(0, col_idx)
cell.text = str(value)
if column_width is not None:
for col_idx, column_width in column_width.items():
table.columns[col_idx].width = Inches(column_width)
table_placeholder._element.getparent().remove(table_placeholder._element)
def bar_chart(chart_placeholder, slide, chart_data, titles={}, min_y=None, max_y=None, type='V', legend=True,
label_type=None, xaxis_pos=None):
x, y, cx, cy = chart_placeholder.left, chart_placeholder.top, chart_placeholder.width, chart_placeholder.height
if type == 'V':
graphic_frame = slide.shapes.add_chart(
XL_CHART_TYPE.COLUMN_CLUSTERED, x, y, cx, cy, chart_data
)
if type == 'H':
graphic_frame = slide.shapes.add_chart(
XL_CHART_TYPE.BAR_CLUSTERED, x, y, cx, cy, chart_data
)
chart = graphic_frame.chart
category_axis = chart.category_axis
value_axis = chart.value_axis
# Add chart title
if 'chart_title' in titles.keys():
chart.has_title = True
chart.chart_title.text_frame.text = titles['chart_title']
chart_title = chart.chart_title.text_frame.paragraphs[0].runs[0]
chart_title.font.size = Pt(CHART_TITLE_FONT_SIZE)
# Add axis titles
if 'x_axis' in titles.keys():
category_axis.has_title = True
category_axis.axis_title.text_frame.text = titles['x_axis']
category_title = category_axis.axis_title.text_frame.paragraphs[0].runs[0]
category_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
if 'y_axis' in titles.keys():
value_axis.has_title = True
value_axis.axis_title.text_frame.text = titles['y_axis']
value_title = value_axis.axis_title.text_frame.paragraphs[0].runs[0]
value_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
if xaxis_pos == 'low':
category_axis.tick_label_position = XL_TICK_LABEL_POSITION.LOW
# Customize the chart
if legend:
chart.has_legend = True
chart.legend.position = XL_LEGEND_POSITION.BOTTOM
chart.legend.font.size = Pt(LEGEND_FONT_SIZE)
chart.legend.include_in_layout = False
# Adjust font size for axis labels
category_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
value_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
if min_y is not None:
value_axis.minimum_scale = min_y # Adjust this value as needed
if max_y is not None:
value_axis.maximum_scale = max_y # Adjust this value as needed
plot = chart.plots[0]
plot.has_data_labels = True
data_labels = plot.data_labels
if label_type == 'per':
data_labels.number_format = '0"%"'
elif label_type == '$':
data_labels.number_format = '$[>=1000000]#,##0.0,,"M";$[>=1000]#,##0.0,"K";$#,##0'
elif label_type == '$1':
data_labels.number_format = '$[>=1000000]#,##0,,"M";$[>=1000]#,##0,"K";$#,##0'
elif label_type == 'M':
data_labels.number_format = '#0.0,,"M"'
elif label_type == 'M1':
data_labels.number_format = '#0.00,,"M"'
elif label_type == 'K':
data_labels.number_format = '#0.0,"K"'
data_labels.font.size = Pt(DATA_LABEL_FONT_SIZE)
chart_placeholder._element.getparent().remove(chart_placeholder._element)
def line_chart(chart_placeholder, slide, chart_data, titles={}, min_y=None, max_y=None):
# Add the chart to the slide
x, y, cx, cy = chart_placeholder.left, chart_placeholder.top, chart_placeholder.width, chart_placeholder.height
chart = slide.shapes.add_chart(
XL_CHART_TYPE.LINE, x, y, cx, cy, chart_data
).chart
chart.has_legend = True
chart.legend.position = XL_LEGEND_POSITION.BOTTOM
chart.legend.font.size = Pt(LEGEND_FONT_SIZE)
category_axis = chart.category_axis
value_axis = chart.value_axis
if min_y is not None:
value_axis.minimum_scale = min_y
if max_y is not None:
value_axis.maximum_scale = max_y
if min_y is not None and max_y is not None:
value_axis.major_unit = int((max_y - min_y) / 2)
if 'chart_title' in titles.keys():
chart.has_title = True
chart.chart_title.text_frame.text = titles['chart_title']
chart_title = chart.chart_title.text_frame.paragraphs[0].runs[0]
chart_title.font.size = Pt(CHART_TITLE_FONT_SIZE)
if 'x_axis' in titles.keys():
category_axis.has_title = True
category_axis.axis_title.text_frame.text = titles['x_axis']
category_title = category_axis.axis_title.text_frame.paragraphs[0].runs[0]
category_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
if 'y_axis' in titles.keys():
value_axis.has_title = True
value_axis.axis_title.text_frame.text = titles['y_axis']
value_title = value_axis.axis_title.text_frame.paragraphs[0].runs[0]
value_title.font.size = Pt(AXIS_TITLE_FONT_SIZE)
# Adjust font size for axis labels
category_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
value_axis.tick_labels.font.size = Pt(AXIS_LABEL_FONT_SIZE)
plot = chart.plots[0]
series = plot.series[1]
line = series.format.line
line.color.rgb = RGBColor(141, 47, 0)
chart_placeholder._element.getparent().remove(chart_placeholder._element)
def pie_chart(chart_placeholder, slide, chart_data, title):
# Add the chart to the slide
x, y, cx, cy = chart_placeholder.left, chart_placeholder.top, chart_placeholder.width, chart_placeholder.height
chart = slide.shapes.add_chart(
XL_CHART_TYPE.PIE, x, y, cx, cy, chart_data
).chart
chart.has_legend = True
chart.legend.position = XL_LEGEND_POSITION.RIGHT
chart.legend.include_in_layout = False
chart.legend.font.size = Pt(PIE_LEGEND_FONT_SIZE)
chart.plots[0].has_data_labels = True
data_labels = chart.plots[0].data_labels
data_labels.number_format = '0%'
data_labels.position = XL_LABEL_POSITION.OUTSIDE_END
data_labels.font.size = Pt(DATA_LABEL_FONT_SIZE)
chart.has_title = True
chart.chart_title.text_frame.text = title
chart_title = chart.chart_title.text_frame.paragraphs[0].runs[0]
chart_title.font.size = Pt(CHART_TITLE_FONT_SIZE)
chart_placeholder._element.getparent().remove(chart_placeholder._element)
def title_and_table(slide, title, df, column_width=None, custom_table_height=False):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = title
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
table_placeholder = slide.placeholders[ph_idx[1]]
table_height = None
if custom_table_height:
if len(df) < 4:
table_height = int(np.ceil(table_placeholder.height / 2))
fill_table_placeholder(table_placeholder, slide, df, column_width, table_height)
# try:
# font_size = 18 # default for 3*3
# if cols < 3:
# row_diff = 3 - rows
# font_size = font_size + ((row_diff)*2) # 1 row less -> 2 pt font size increase & vice versa
# else:
# row_diff = 2 - rows
# font_size = font_size + ((row_diff)*2)
# for row in table.rows:
# for cell in row.cells:
# cell.text_frame.paragraphs[0].runs[0].font.size = Pt(font_size)
# except Exception as e :
# print("**"*30)
# print(e)
# else:
# except Exception as e:
# print('table', e)
return slide
def data_import(data, bin_dict):
import_df = pd.DataFrame(columns=['Category', 'Value'])
import_df.at[0, 'Category'] = 'Date Range'
date_start = data['date'].min().date()
date_end = data['date'].max().date()
import_df.at[0, 'Value'] = str(date_start) + ' - ' + str(date_end)
import_df.at[1, 'Category'] = 'Response Metrics'
import_df.at[1, 'Value'] = ', '.join(bin_dict['Response Metrics'])
import_df.at[2, 'Category'] = 'Media Variables'
import_df.at[2, 'Value'] = ', '.join(bin_dict['Media'])
import_df.at[3, 'Category'] = 'Spend Variables'
import_df.at[3, 'Value'] = ', '.join(bin_dict['Spends'])
if bin_dict['Exogenous'] != []:
import_df.at[4, 'Category'] = 'Exogenous Variables'
import_df.at[4, 'Value'] = ', '.join(bin_dict['Exogenous'])
return import_df
def channel_groups_df(channel_groups_dct={}, bin_dict={}):
df = pd.DataFrame(columns=['Channel', 'Media Variables', 'Spend Variables'])
i = 0
for channel, vars in channel_groups_dct.items():
media_vars = ", ".join(list(set(vars).intersection(set(bin_dict["Media"]))))
spend_vars = ", ".join(list(set(vars).intersection(set(bin_dict["Spends"]))))
df.at[i, "Channel"] = channel
df.at[i, 'Media Variables'] = media_vars
df.at[i, 'Spend Variables'] = spend_vars
i += 1
return df
def transformations(transform_dict):
transform_df = pd.DataFrame(columns=['Category', 'Transformation', 'Value'])
i = 0
for category in ['Media', 'Exogenous']:
transformations = f'transformation_{category}'
category_dict = transform_dict[category]
if transformations in category_dict.keys():
for transformation in category_dict[transformations]:
transform_df.at[i, 'Category'] = category
transform_df.at[i, 'Transformation'] = transformation
transform_df.at[i, 'Value'] = str(category_dict[transformation][0]) + ' - ' + str(
category_dict[transformation][1])
i += 1
return transform_df
def model_metrics(model_dict, is_panel):
metrics_df = pd.DataFrame(
columns=[
"Response Metric",
"Model",
"R2",
"ADJR2",
"Train MAPE",
"Test MAPE"
]
)
i = 0
for key in model_dict.keys():
target = key.split("__")[1]
metrics_df.at[i, "Response Metric"] = format_response_metric(target)
metrics_df.at[i, "Model"] = key.split("__")[0]
y = model_dict[key]["X_train_tuned"][target]
feature_set = model_dict[key]["feature_set"]
if is_panel:
random_df = get_random_effects(
media_data, panel_col, model_dict[key]["Model_object"]
)
pred = mdf_predict(
model_dict[key]["X_train_tuned"],
model_dict[key]["Model_object"],
random_df,
)["pred"]
else:
pred = model_dict[key]["Model_object"].predict(model_dict[key]["X_train_tuned"][feature_set])
ytest = model_dict[key]["X_test_tuned"][target]
if is_panel:
predtest = mdf_predict(
model_dict[key]["X_test_tuned"],
model_dict[key]["Model_object"],
random_df,
)["pred"]
else:
predtest = model_dict[key]["Model_object"].predict(model_dict[key]["X_test_tuned"][feature_set])
metrics_df.at[i, "R2"] = np.round(r2_score(y, pred), 2)
adjr2 = 1 - (1 - metrics_df.loc[i, "R2"]) * (
len(y) - 1
) / (len(y) - len(model_dict[key]["feature_set"]) - 1)
metrics_df.at[i, "ADJR2"] = np.round(adjr2, 2)
# y = np.where(np.abs(y) < 0.00001, 0.00001, y)
metrics_df.at[i, "Train MAPE"] = np.round(smape(y, pred), 2)
metrics_df.at[i, "Test MAPE"] = np.round(smape(ytest, predtest), 2)
i += 1
metrics_df = np.round(metrics_df, 2)
return metrics_df
def model_result(slide, model_key, model_dict, model_metrics_df, date_col):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = model_key.split('__')[0]
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
target = model_key.split('__')[1]
metrics_table_placeholder = slide.placeholders[ph_idx[1]]
metrics_df = model_metrics_df[model_metrics_df['Model'] == model_key.split('__')[0]].reset_index(drop=True)
# Accuracy = 1-mape
metrics_df['Accuracy'] = 100 * (1 - metrics_df['Train MAPE'])
metrics_df['Accuracy'] = metrics_df['Accuracy'].apply(lambda x: f'{np.round(x, 0)}%')
## Removing metrics as requested by Ioannis
metrics_df = metrics_df.drop(columns=['R2', 'ADJR2', 'Train MAPE', 'Test MAPE'])
fill_table_placeholder(metrics_table_placeholder, slide, metrics_df)
# coeff_table_placeholder = slide.placeholders[ph_idx[2]]
# coeff_df = pd.DataFrame(model_dict['Model_object'].params)
# coeff_df.reset_index(inplace=True)
# coeff_df.columns = ['Feature', 'Coefficent']
# fill_table_placeholder(coeff_table_placeholder, slide, coeff_df)
chart_placeholder = slide.placeholders[ph_idx[2]]
full_df = pd.concat([model_dict['X_train_tuned'], model_dict['X_test_tuned']])
full_df['Predicted'] = model_dict['Model_object'].predict(full_df[model_dict['feature_set']])
pred_df = full_df[[date_col, target, 'Predicted']]
pred_df.rename(columns={target: 'Actual'}, inplace=True)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = pred_df[date_col]
chart_data.add_series('Actual', pred_df['Actual'])
chart_data.add_series('Predicted', pred_df['Predicted'])
# Set range for y axis
min_y = np.floor(min(pred_df['Actual'].min(), pred_df['Predicted'].min()))
max_y = np.ceil(max(pred_df['Actual'].max(), pred_df['Predicted'].max()))
# Create the chart
line_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Actual VS Predicted',
'x_axis': 'Date',
'y_axis': target.title().replace('_', ' ')
},
min_y=min_y,
max_y=max_y
)
return slide
def metrics_contributions(slide, contributions_excels_dict, panel_col):
# Create data for metrics contributions
all_contribution_df = pd.DataFrame(columns=['Channel'])
target_sum_dict = {}
sort_support_dct = {}
for target in contributions_excels_dict.keys():
contribution_df = contributions_excels_dict[target]['CONTRIBUTION MMM'].copy()
if 'Date' in contribution_df.columns:
contribution_df.drop(columns=['Date'], inplace=True)
if panel_col in contribution_df.columns:
contribution_df.drop(columns=[panel_col], inplace=True)
contribution_df = pd.DataFrame(np.sum(contribution_df, axis=0)).reset_index()
contribution_df.columns = ['Channel', target]
target_sum = contribution_df[target].sum()
target_sum_dict[target] = target_sum
contribution_df[target] = 100 * contribution_df[target] / target_sum
all_contribution_df = pd.merge(all_contribution_df, contribution_df, on='Channel', how='outer')
sorted_target_sum_dict = sorted(target_sum_dict.items(), key=lambda kv: kv[1], reverse=True)
sorted_target_sum_keys = [kv[0] for kv in sorted_target_sum_dict]
if len([metric for metric in sorted_target_sum_keys if metric.lower() == 'revenue']) == 1:
rev_metric = [metric for metric in sorted_target_sum_keys if metric.lower() == 'revenue'][0]
sorted_target_sum_keys.remove(rev_metric)
sorted_target_sum_keys.append(rev_metric)
all_contribution_df = all_contribution_df[['Channel'] + sorted_target_sum_keys]
# for col in all_contribution_df.columns:
# all_contribution_df[col]=all_contribution_df[col].apply(lambda x: round_off(x,1))
# Sort Data by Average contribution of the channels keeping base first <Removed>
# all_contribution_df['avg'] = np.mean(all_contribution_df[list(contributions_excels_dict.keys())],axis=1)
# all_contribution_df['rank'] = all_contribution_df['avg'].rank(ascending=False)
# Sort data by contribution of bottom funnel metric
bottom_funnel_metric = sorted_target_sum_keys[-1]
all_contribution_df['rank'] = all_contribution_df[bottom_funnel_metric].rank(ascending=False)
all_contribution_df.loc[all_contribution_df[all_contribution_df['Channel'] == 'base'].index, 'rank'] = 0
all_contribution_df = all_contribution_df.sort_values(by='rank')
all_contribution_df.drop(columns=['rank'], inplace=True)
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = "Response Metrics Contributions"
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
for target in contributions_excels_dict.keys():
all_contribution_df[target] = all_contribution_df[target].astype(float)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = all_contribution_df['Channel']
for target in sorted_target_sum_keys:
chart_data.add_series(format_response_metric(target), all_contribution_df[target])
chart_placeholder = slide.placeholders[ph_idx[1]]
if isinstance(np.min(all_contribution_df.select_dtypes(exclude=['object', 'datetime'])), float):
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Response Metrics Contributions',
# 'x_axis':'Channels',
'y_axis': 'Contributions'},
min_y=np.floor(np.min(all_contribution_df.select_dtypes(exclude=['object', 'datetime']))),
max_y=np.ceil(np.max(all_contribution_df.select_dtypes(exclude=['object', 'datetime']))),
type='V',
label_type='per'
)
else:
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Response Metrics Contributions',
# 'x_axis':'Channels',
'y_axis': 'Contributions'},
min_y=np.floor(np.min(all_contribution_df.select_dtypes(exclude=['object', 'datetime'])).values[0]),
max_y=np.ceil(np.max(all_contribution_df.select_dtypes(exclude=['object', 'datetime'])).values[0]),
type='V',
label_type='per'
)
return slide
def model_media_performance(slide, target, contributions_excels_dict, date_col='Date', is_panel=False,
panel_col='panel'):
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = "Media Performance - " + target.title().replace("_", " ")
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
# CONTRIBUTION CHART
# Create contribution data
contribution_df = contributions_excels_dict[target]['CONTRIBUTION MMM']
if panel_col in contribution_df.columns:
contribution_df.drop(columns=[panel_col], inplace=True)
# contribution_df.drop(columns=[date_col], inplace=True)
contribution_df = pd.DataFrame(np.sum(contribution_df, axis=0)).reset_index()
contribution_df.columns = ['Channel', format_response_metric(target)]
contribution_df['Channel'] = contribution_df['Channel'].apply(lambda x: x.title())
target_sum = contribution_df[format_response_metric(target)].sum()
contribution_df[format_response_metric(target)] = contribution_df[format_response_metric(target)] / target_sum
contribution_df.sort_values(by=['Channel'], ascending=False, inplace=True)
# for col in contribution_df.columns:
# contribution_df[col] = contribution_df[col].apply(lambda x : round_off(x))
# Create Chart Data
chart_data = ChartData()
chart_data.categories = contribution_df['Channel']
chart_data.add_series('Contribution', contribution_df[format_response_metric(target)])
chart_placeholder = slide.placeholders[ph_idx[2]]
pie_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
title='Contribution')
# SPENDS CHART
initialize_data(panel='aggregated', metrics=target)
scenario = st.session_state["scenario"]
spends_values = {
channel_name: round(
scenario.channels[channel_name].actual_total_spends
* scenario.channels[channel_name].conversion_rate,
1,
)
for channel_name in st.session_state["channels_list"]
}
spends_df = pd.DataFrame(columns=['Channel', 'Media Spend'])
spends_df['Channel'] = list(spends_values.keys())
spends_df['Media Spend'] = list(spends_values.values())
spends_sum = spends_df['Media Spend'].sum()
spends_df['Media Spend'] = spends_df['Media Spend'] / spends_sum
spends_df['Channel'] = spends_df['Channel'].apply(lambda x: x.title())
spends_df.sort_values(by='Channel', ascending=False, inplace=True)
# for col in spends_df.columns:
# spends_df[col] = spends_df[col].apply(lambda x : round_off(x))
# Create Chart Data
spends_chart_data = ChartData()
spends_chart_data = ChartData()
spends_chart_data.categories = spends_df['Channel']
spends_chart_data.add_series('Media Spend', spends_df['Media Spend'])
spends_chart_placeholder = slide.placeholders[ph_idx[1]]
pie_chart(chart_placeholder=spends_chart_placeholder,
slide=slide,
chart_data=spends_chart_data,
title='Media Spend')
# spends_values.append(0)
return contribution_df, spends_df
# def get_saved_scenarios_dict(project_path):
# # Path to the saved scenarios file
# saved_scenarios_dict_path = os.path.join(
# project_path, "saved_scenarios.pkl"
# )
#
# # Load existing scenarios if the file exists
# if os.path.exists(saved_scenarios_dict_path):
# with open(saved_scenarios_dict_path, "rb") as f:
# saved_scenarios_dict = pickle.load(f)
# else:
# saved_scenarios_dict = OrderedDict()
#
# return saved_scenarios_dict
def optimization_summary(slide, scenario, scenario_name):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Optimization Summary' # + ' (Scenario: ' + scenario_name + ')'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
multiplier = 1 / float(scenario['multiplier'])
# st.write(scenario['multiplier'], multiplier)
## Multiplier is an indicator of selected time fram
## Doesn't effect CPA
opt_on = scenario['optimization']
if opt_on.lower() == 'spends':
opt_on = 'Media Spend'
details_ph = slide.placeholders[ph_idx[3]]
details_ph.text = 'Scenario Name: ' + scenario_name + \
'\nResponse Metric: ' + str(scenario['metrics_selected']).replace("_", " ").title() + \
'\nOptimized on: ' + str(opt_on).replace("_", " ").title()
scenario_df = pd.DataFrame(columns=['Category', 'Actual', 'Simulated', 'Change'])
scenario_df.at[0, 'Category'] = 'Media Spend'
scenario_df.at[0, 'Actual'] = scenario['actual_total_spends'] * multiplier
scenario_df.at[0, 'Simulated'] = scenario['modified_total_spends'] * multiplier
scenario_df.at[0, 'Change'] = (scenario['modified_total_spends'] - scenario['actual_total_spends']) * multiplier
scenario_df.at[1, 'Category'] = scenario['metrics_selected'].replace("_", " ").title()
scenario_df.at[1, 'Actual'] = scenario['actual_total_sales'] * multiplier
scenario_df.at[1, 'Simulated'] = (scenario['modified_total_sales']) * multiplier
scenario_df.at[1, 'Change'] = (scenario['modified_total_sales'] - scenario['actual_total_sales']) * multiplier
scenario_df.at[2, 'Category'] = 'CPA'
actual_cpa = scenario['actual_total_spends'] / scenario['actual_total_sales']
modified_cpa = scenario['modified_total_spends'] / scenario['modified_total_sales']
scenario_df.at[2, 'Actual'] = actual_cpa
scenario_df.at[2, 'Simulated'] = modified_cpa
scenario_df.at[2, 'Change'] = modified_cpa - actual_cpa
scenario_df.at[3, 'Category'] = 'ROI'
act_roi = scenario['actual_total_sales'] / scenario['actual_total_spends']
opt_roi = scenario['modified_total_sales'] / scenario['modified_total_spends']
scenario_df.at[3, 'Actual'] = act_roi
scenario_df.at[3, 'Simulated'] = opt_roi
scenario_df.at[3, 'Change'] = opt_roi - act_roi
for col in scenario_df.columns:
scenario_df[col] = scenario_df[col].apply(lambda x: round_off(x, 1))
scenario_df[col] = scenario_df[col].apply(lambda x: convert_number_to_abbreviation(x))
table_placeholder = slide.placeholders[ph_idx[1]]
fill_table_placeholder(table_placeholder, slide, scenario_df)
channel_spends_df = pd.DataFrame(columns=['Channel', 'Actual Spends', 'Optimized Spends'])
for i, channel in enumerate(scenario['channels'].values()):
channel_spends_df.at[i, 'Channel'] = channel['name']
channel_conversion_rate = channel[
"conversion_rate"
]
channel_spends_df.at[i, 'Actual Spends'] = (
channel["actual_total_spends"]
* channel_conversion_rate
) * multiplier
channel_spends_df.at[i, 'Optimized Spends'] = (
channel["modified_total_spends"]
* channel_conversion_rate
) * multiplier
channel_spends_df['Actual Spends'] = channel_spends_df['Actual Spends'].astype('float')
channel_spends_df['Optimized Spends'] = channel_spends_df['Optimized Spends'].astype('float')
for col in channel_spends_df.columns:
channel_spends_df[col] = channel_spends_df[col].apply(lambda x: round_off(x, 0))
# Sort data on Actual Spends
channel_spends_df.sort_values(by='Actual Spends', inplace=True, ascending=False)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = channel_spends_df['Channel']
for col in ['Actual Spends', 'Optimized Spends']:
chart_data.add_series(col, channel_spends_df[col])
chart_placeholder = slide.placeholders[ph_idx[2]]
# Add the chart to the slide
if isinstance(np.max(channel_spends_df.select_dtypes(exclude=['object', 'datetime'])),float):
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise Spends',
# 'x_axis':'Channels',
'y_axis': 'Spends'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.ceil(np.max(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
label_type='$'
)
else:
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise Spends',
# 'x_axis':'Channels',
'y_axis': 'Spends'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.ceil(np.max(channel_spends_df.select_dtypes(exclude=['object', 'datetime'])).values[0]),
label_type='$'
)
def channel_wise_spends(slide, scenario):
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Channel Spends and Impact'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
# print(scenario.keys())
multiplier = 1 / float(scenario['multiplier'])
channel_spends_df = pd.DataFrame(columns=['Channel', 'Actual Spends', 'Optimized Spends'])
for i, channel in enumerate(scenario['channels'].values()):
channel_spends_df.at[i, 'Channel'] = channel['name']
channel_conversion_rate = channel["conversion_rate"]
channel_spends_df.at[i, 'Actual Spends'] = (channel[
"actual_total_spends"] * channel_conversion_rate) * multiplier
channel_spends_df.at[i, 'Optimized Spends'] = (channel[
"modified_total_spends"] * channel_conversion_rate) * multiplier
channel_spends_df['Actual Spends'] = channel_spends_df['Actual Spends'].astype('float')
channel_spends_df['Optimized Spends'] = channel_spends_df['Optimized Spends'].astype('float')
actual_sum = channel_spends_df['Actual Spends'].sum()
opt_sum = channel_spends_df['Optimized Spends'].sum()
for col in channel_spends_df.columns:
channel_spends_df[col] = channel_spends_df[col].apply(lambda x: round_off(x, 0))
channel_spends_df['Actual Spends %'] = 100 * (channel_spends_df['Actual Spends'] / actual_sum)
channel_spends_df['Optimized Spends %'] = 100 * (channel_spends_df['Optimized Spends'] / opt_sum)
channel_spends_df['Actual Spends %'] = np.round(channel_spends_df['Actual Spends %'])
channel_spends_df['Optimized Spends %'] = np.round(channel_spends_df['Optimized Spends %'])
# Sort Data based on Actual Spends %
channel_spends_df.sort_values(by='Actual Spends %', inplace=True)
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = channel_spends_df['Channel']
for col in ['Actual Spends %', 'Optimized Spends %']:
# for col in ['Actual Spends %']:
chart_data.add_series(col, channel_spends_df[col])
chart_placeholder = slide.placeholders[ph_idx[1]]
# Add the chart to the slide
if isinstance(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']]), float):
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Spend Split %',
# 'x_axis':'Channels',
'y_axis': 'Spend %'},
min_y=0,
max_y=np.ceil(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']])),
type='H',
legend=True,
label_type='per',
xaxis_pos='low'
)
else:
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Spend Split %',
# 'x_axis':'Channels',
'y_axis': 'Spend %'},
min_y=0,
max_y=np.ceil(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']]).values[0]),
type='H',
legend=True,
label_type='per',
xaxis_pos='low'
)
#
# # Create chart data
# chart_data_1 = CategoryChartData()
# chart_data_1.categories = channel_spends_df['Channel']
# # for col in ['Actual Spends %', 'Optimized Spends %']:
# for col in ['Optimized Spends %']:
# chart_data_1.add_series(col, channel_spends_df[col])
# chart_placeholder_1 = slide.placeholders[ph_idx[3]]
#
# # Add the chart to the slide
# bar_chart(chart_placeholder=chart_placeholder_1,
# slide=slide,
# chart_data=chart_data_1,
# titles={'chart_title': 'Optimized Spends Split %',
# # 'x_axis':'Channels',
# 'y_axis': 'Spends %'},
# min_y=0,
# max_y=np.ceil(np.max(channel_spends_df[['Actual Spends %', 'Optimized Spends %']])),
# type='H',
# legend=False,
# label_type='per'
# )
channel_spends_df['Delta %'] = 100 * (channel_spends_df['Optimized Spends'] - channel_spends_df['Actual Spends']) / \
channel_spends_df['Actual Spends']
channel_spends_df['Delta %'] = channel_spends_df['Delta %'].apply(lambda x: round_off(x, 0))
# Create chart data
delta_chart_data = CategoryChartData()
delta_chart_data.categories = channel_spends_df['Channel']
col = 'Delta %'
delta_chart_data.add_series(col, channel_spends_df[col])
delta_chart_placeholder = slide.placeholders[ph_idx[3]]
# Add the chart to the slide
if isinstance(np.min(channel_spends_df['Delta %']), float):
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Spend Delta %',
'y_axis': 'Spend Delta %'},
min_y=np.floor(np.min(channel_spends_df['Delta %'])),
max_y=np.ceil(np.max(channel_spends_df['Delta %'])),
type='H',
legend=False,
label_type='per',
xaxis_pos='low'
)
else:
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Spend Delta %',
'y_axis': 'Spend Delta %'},
min_y=np.floor(np.min(channel_spends_df['Delta %']).values[0]),
max_y=np.ceil(np.max(channel_spends_df['Delta %']).values[0]),
type='H',
legend=False,
label_type='per',
xaxis_pos='low'
)
# Incremental Impact
channel_inc_df = pd.DataFrame(columns=['Channel', 'Increment'])
for i, channel in enumerate(scenario['channels'].values()):
channel_inc_df.at[i, 'Channel'] = channel['name']
act_impact = channel['actual_total_sales']
opt_impact = channel['modified_total_sales']
impact = opt_impact - act_impact
impact = round_off(impact, 0)
impact = impact if abs(impact) > 0.0001 else 0
channel_inc_df.at[i, 'Increment'] = impact
channel_inc_df_1 = pd.merge(channel_spends_df, channel_inc_df, how='left', on='Channel')
# Create chart data
delta_chart_data = CategoryChartData()
delta_chart_data.categories = channel_inc_df_1['Channel']
col = 'Increment'
delta_chart_data.add_series(col, channel_inc_df_1[col])
delta_chart_placeholder = slide.placeholders[ph_idx[2]]
label_req = True
if min(np.abs(channel_inc_df_1[col])) > 100000: # 0.1M
label_type = 'M'
elif min(np.abs(channel_inc_df_1[col])) > 10000 and max(np.abs(channel_inc_df_1[col])) > 1000000:
label_type = 'M1'
elif min(np.abs(channel_inc_df_1[col])) > 100 and max(np.abs(channel_inc_df_1[col])) > 1000:
label_type = 'K'
else:
label_req = False
# Add the chart to the slide
if label_req:
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Incremental Impact',
'y_axis': format_response_metric(scenario['metrics_selected'])},
# min_y=np.floor(np.min(channel_inc_df_1['Delta %'])),
# max_y=np.ceil(np.max(channel_inc_df_1['Delta %'])),
type='H',
label_type=label_type,
legend=False,
xaxis_pos='low'
)
else:
bar_chart(chart_placeholder=delta_chart_placeholder,
slide=slide,
chart_data=delta_chart_data,
titles={'chart_title': 'Increment',
'y_axis': scenario['metrics_selected']},
# min_y=np.floor(np.min(channel_inc_df_1['Delta %'])),
# max_y=np.ceil(np.max(channel_inc_df_1['Delta %'])),
type='H',
legend=False,
xaxis_pos='low'
)
def channel_wise_roi(slide, scenario):
channel_roi_mroi = scenario['channel_roi_mroi']
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Channel ROIs'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
channel_roi_df = pd.DataFrame(columns=['Channel', 'Actual ROI', 'Optimized ROI'])
for i, channel in enumerate(channel_roi_mroi.keys()):
channel_roi_df.at[i, 'Channel'] = channel
channel_roi_df.at[i, 'Actual ROI'] = channel_roi_mroi[channel]['actual_roi']
channel_roi_df.at[i, 'Optimized ROI'] = channel_roi_mroi[channel]['optimized_roi']
channel_roi_df['Actual ROI'] = channel_roi_df['Actual ROI'].astype('float')
channel_roi_df['Optimized ROI'] = channel_roi_df['Optimized ROI'].astype('float')
for col in channel_roi_df.columns:
channel_roi_df[col] = channel_roi_df[col].apply(lambda x: round_off(x, 2))
# Create chart data
chart_data = CategoryChartData()
chart_data.categories = channel_roi_df['Channel']
for col in ['Actual ROI', 'Optimized ROI']:
chart_data.add_series(col, channel_roi_df[col])
chart_placeholder = slide.placeholders[ph_idx[1]]
# Add the chart to the slide
if isinstance(channel_roi_df.select_dtypes(exclude=['object', 'datetime']), float):
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise ROI',
# 'x_axis':'Channels',
'y_axis': 'ROI'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime']))
)
else:
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Channel Wise ROI',
# 'x_axis':'Channels',
'y_axis': 'ROI'},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime'])).values[0]
)
# act_roi = scenario['actual_total_sales']/scenario['actual_total_spends']
# opt_roi = scenario['modified_total_sales']/scenario['modified_total_spends']
#
# act_roi_ph = slide.placeholders[ph_idx[2]]
# act_roi_ph.text = 'Actual ROI: ' + str(round_off(act_roi,2))
# opt_roi_ph = slide.placeholders[ph_idx[3]]
# opt_roi_ph.text = 'Optimized ROI: ' + str(round_off(opt_roi, 2))
## Removing mroi chart as per Ioannis' feedback
# channel_mroi_df = pd.DataFrame(columns=['Channel', 'Actual mROI', 'Optimized mROI'])
# for i, channel in enumerate(channel_roi_mroi.keys()):
# channel_mroi_df.at[i, 'Channel'] = channel
# channel_mroi_df.at[i, 'Actual mROI'] = channel_roi_mroi[channel]['actual_mroi']
# channel_mroi_df.at[i, 'Optimized mROI'] = channel_roi_mroi[channel]['optimized_mroi']
# channel_mroi_df['Actual mROI']=channel_mroi_df['Actual mROI'].astype('float')
# channel_mroi_df['Optimized mROI']=channel_mroi_df['Optimized mROI'].astype('float')
#
# for col in channel_mroi_df.columns:
# channel_mroi_df[col]=channel_mroi_df[col].apply(lambda x: round_off(x))
#
# # Create chart data
# mroi_chart_data = CategoryChartData()
# mroi_chart_data.categories = channel_mroi_df['Channel']
# for col in ['Actual mROI', 'Optimized mROI']:
# mroi_chart_data.add_series(col, channel_mroi_df[col])
#
# mroi_chart_placeholder=slide.placeholders[ph_idx[2]]
#
# # Add the chart to the slide
# bar_chart(chart_placeholder=mroi_chart_placeholder,
# slide=slide,
# chart_data=mroi_chart_data,
# titles={'chart_title':'Channel Wise mROI',
# # 'x_axis':'Channels',
# 'y_axis':'mROI'},
# # min_y=np.floor(np.min(channel_mroi_df.select_dtypes(exclude=['object', 'datetime']))),
# min_y=0,
# max_y=np.ceil(np.max(channel_mroi_df.select_dtypes(exclude=['object', 'datetime'])))
# )
def effictiveness_efficiency(slide, final_data, bin_dct, scenario):
# Add title
placeholders = slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = slide.placeholders[ph_idx[0]]
title_ph.text = 'Effectiveness and Efficiency'
title_ph.text_frame.paragraphs[0].font.size = Pt(TITLE_FONT_SIZE)
response_metrics = bin_dct['Response Metrics']
kpi_df = final_data[response_metrics].sum(axis=0).reset_index()
kpi_df.columns = ['Response Metric', 'Effectiveness']
kpi_df['Efficiency'] = kpi_df['Effectiveness'] / scenario['modified_total_spends']
kpi_df['Efficiency'] = kpi_df['Efficiency'].apply(lambda x: round_off(x, 1))
kpi_df.sort_values(by='Effectiveness', inplace=True)
kpi_df['Response Metric'] = kpi_df['Response Metric'].apply(lambda x: format_response_metric(x))
# Create chart data for effectiveness
chart_data = CategoryChartData()
chart_data.categories = kpi_df['Response Metric']
chart_data.add_series('Effectiveness', kpi_df['Effectiveness'])
chart_placeholder = slide.placeholders[ph_idx[1]]
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder,
slide=slide,
chart_data=chart_data,
titles={'chart_title': 'Effectiveness',
# 'x_axis':'Channels',
# 'y_axis': 'ROI'
},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
# max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime'])),
type='H',
label_type='M'
)
# Create chart data for efficiency
chart_data_1 = CategoryChartData()
chart_data_1.categories = kpi_df['Response Metric']
chart_data_1.add_series('Efficiency', kpi_df['Efficiency'])
chart_placeholder_1 = slide.placeholders[ph_idx[2]]
# Add the chart to the slide
bar_chart(chart_placeholder=chart_placeholder_1,
slide=slide,
chart_data=chart_data_1,
titles={'chart_title': 'Efficiency',
# 'x_axis':'Channels',
# 'y_axis': 'ROI'
},
# min_y=np.floor(np.min(channel_spends_df.select_dtypes(exclude=['object', 'datetime']))),
min_y=0,
# max_y=np.max(channel_roi_df.select_dtypes(exclude=['object', 'datetime'])),
type='H'
)
definition_ph_1 = slide.placeholders[ph_idx[3]]
definition_ph_1.text = 'Effectiveness is measured as the total sum of the Response Metric'
definition_ph_2 = slide.placeholders[ph_idx[4]]
definition_ph_2.text = 'Efficiency is measured as the ratio of sum of the Response Metric and sum of Media Spend'
def load_pickle(path):
with open(path, "rb") as f:
file_data = pickle.load(f)
return file_data
def read_all_files():
files=[]
# Read data and bin dictionary
if st.session_state["project_dct"]["data_import"]["imputed_tool_df"] is not None:
final_df_loaded = st.session_state["project_dct"]["data_import"]["imputed_tool_df"].copy()
bin_dict_loaded = st.session_state["project_dct"]["data_import"]["category_dict"].copy()
files.append(final_df_loaded)
files.append(bin_dict_loaded)
if "group_dict" in st.session_state["project_dct"]["data_import"].keys():
channels = st.session_state["project_dct"]["data_import"]["group_dict"]
files.append(channels)
if st.session_state["project_dct"]["transformations"]["final_df"] is not None:
transform_dict = st.session_state["project_dct"]["transformations"]
files.append(transform_dict)
if retrieve_pkl_object_without_warning(st.session_state['project_number'], "Model_Tuning", "tuned_model", schema) is not None:
tuned_model_dict = retrieve_pkl_object_without_warning(st.session_state['project_number'], "Model_Tuning",
"tuned_model", schema) # db
files.append(tuned_model_dict)
else:
files.append(None)
else:
files.append(None)
if len(list(st.session_state["project_dct"]["current_media_performance"]["model_outputs"].keys()))>0: # check if there are model outputs for at least one metric
metrics_list = list(st.session_state["project_dct"]["current_media_performance"]["model_outputs"].keys())
contributions_excels_dict = {}
for metrics in metrics_list:
# raw_df = st.session_state["project_dct"]["current_media_performance"]["model_outputs"][metrics]["raw_data"]
# spend_df = st.session_state["project_dct"]["current_media_performance"]["model_outputs"][metrics]["spends_data"]
contribution_df = st.session_state["project_dct"]["current_media_performance"]["model_outputs"][metrics]["contribution_data"]
contributions_excels_dict[metrics] = {'CONTRIBUTION MMM':contribution_df}
files.append(contributions_excels_dict)
# Get Saved Scenarios
if len(list(st.session_state["project_dct"]["saved_scenarios"]["saved_scenarios_dict"].keys()))>0:
files.append(st.session_state["project_dct"]["saved_scenarios"]["saved_scenarios_dict"])
# saved_scenarios_loaded = get_saved_scenarios_dict(project_path)
return files
'''
Template Layout
0 : Title
1 : Data Details Section {no changes required}
2 : Data Import
3 : Data Import - Channel Groups
4 : Model Results {Duplicate for each model}
5 : Metrics Contribution
6 : Media performance {Duplicate for each model}
7 : Media performance Tabular View {Duplicate for each model}
8 : Optimization Section {no changes}
9 : Optimization Summary {Duplicate for each section}
10 : Channel Spends {Duplicate for each model}
11 : Channel Wise ROI {Duplicate for each model}
12 : Efficiency & Efficacy
13 : Appendix
14 : Transformations
15 : Model Summary
16 : Thank You Slide
'''
def create_ppt(project_name, username, panel_col):
# Read saved files
files = read_all_files()
transform_dict, tuned_model_dict, contributions_excels_dict, saved_scenarios_loaded = None, None, None, None
if len(files)>0:
# saved_data = files[0]
data = files[0]
bin_dict = files[1]
channel_groups_dct = files[2]
try:
transform_dict = files[3]
tuned_model_dict = files[4]
contributions_excels_dict = files[5]
saved_scenarios_loaded = files[6]
except Exception as e:
print(e)
else:
return False
is_panel = True if data[panel_col].nunique()>1 else False
template_path = 'ppt/template.pptx'
# ppt_path = os.path.join('ProjectSummary.pptx')
prs = Presentation(template_path)
num_slides = len(prs.slides)
slides = prs.slides
# Title Slide
title_slide_layout = slides[0].slide_layout
title_slide = prs.slides.add_slide(title_slide_layout)
# Add title & project name
placeholders = title_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = title_slide.placeholders[ph_idx[0]]
title_ph.text = 'Media Mix Optimization Summary'
txt_ph = title_slide.placeholders[ph_idx[1]]
txt_ph.text = 'Project Name: ' + project_name + '\nCreated By: ' + username
# Model Details Section
model_section_slide_layout = slides[1].slide_layout
model_section_slide = prs.slides.add_slide(model_section_slide_layout)
## Add title
placeholders = model_section_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = model_section_slide.placeholders[ph_idx[0]]
title_ph.text = 'Model Details'
section_ph = model_section_slide.placeholders[ph_idx[1]]
section_ph.text = 'Section 1'
# Data Import
data_import_slide_layout = slides[2].slide_layout
data_import_slide = prs.slides.add_slide(data_import_slide_layout)
data_import_slide = title_and_table(slide=data_import_slide,
title='Data Import',
df=data_import(data, bin_dict),
column_width={0: 2, 1: 7}
)
# Channel Groups
channel_group_slide_layout = slides[3].slide_layout
channel_group_slide = prs.slides.add_slide(channel_group_slide_layout)
channel_group_slide = title_and_table(slide=channel_group_slide,
title='Channels - Media and Spend',
df=channel_groups_df(channel_groups_dct, bin_dict),
column_width={0: 2, 1: 5, 2: 2}
)
if tuned_model_dict is not None:
model_metrics_df = model_metrics(tuned_model_dict, False)
# Model Results
for model_key, model_dict in tuned_model_dict.items():
model_result_slide_layout = slides[4].slide_layout
model_result_slide = prs.slides.add_slide(model_result_slide_layout)
model_result_slide = model_result(slide=model_result_slide,
model_key=model_key,
model_dict=model_dict,
model_metrics_df=model_metrics_df,
date_col='date')
if contributions_excels_dict is not None:
# Metrics Contributions
metrics_contributions_slide_layout = slides[5].slide_layout
metrics_contributions_slide = prs.slides.add_slide(metrics_contributions_slide_layout)
metrics_contributions_slide = metrics_contributions(slide=metrics_contributions_slide,
contributions_excels_dict=contributions_excels_dict,
panel_col=panel_col
)
# Media Performance
for target in contributions_excels_dict.keys():
# Chart
model_media_perf_slide_layout = slides[6].slide_layout
model_media_perf_slide = prs.slides.add_slide(model_media_perf_slide_layout)
contribution_df, spends_df = model_media_performance(slide=model_media_perf_slide,
target=target,
contributions_excels_dict=contributions_excels_dict
)
# Tabular View
contri_spends_df = pd.merge(spends_df, contribution_df, on='Channel', how='outer')
contri_spends_df.fillna(0, inplace=True)
for col in [c for c in contri_spends_df.columns if c != 'Channel']:
contri_spends_df[col] = contri_spends_df[col].apply(lambda x: safe_num_to_per(x))
media_performance_table_slide_layout = slides[7].slide_layout
media_performance_table_slide = prs.slides.add_slide(media_performance_table_slide_layout)
media_performance_table_slide = title_and_table(slide=media_performance_table_slide,
title='Media and Spends Channels Tabular View',
df=contri_spends_df,
# column_width={0:2, 1:5, 2:2}
)
if saved_scenarios_loaded is not None:
# Optimization Details
opt_section_slide_layout = slides[8].slide_layout
opt_section_slide = prs.slides.add_slide(opt_section_slide_layout)
## Add title
placeholders = opt_section_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = opt_section_slide.placeholders[ph_idx[0]]
title_ph.text = 'Optimizations Details'
section_ph = opt_section_slide.placeholders[ph_idx[1]]
section_ph.text = 'Section 2'
# Optimization
for scenario_name, scenario in saved_scenarios_loaded.items():
opt_summary_slide_layout = slides[9].slide_layout
opt_summary_slide = prs.slides.add_slide(opt_summary_slide_layout)
optimization_summary(opt_summary_slide, scenario, scenario_name)
channel_spends_slide_layout = slides[10].slide_layout
channel_spends_slide = prs.slides.add_slide(channel_spends_slide_layout)
channel_wise_spends(channel_spends_slide, scenario)
channel_roi_slide_layout = slides[11].slide_layout
channel_roi_slide = prs.slides.add_slide(channel_roi_slide_layout)
channel_wise_roi(channel_roi_slide, scenario)
effictiveness_efficiency_slide_layout = slides[12].slide_layout
effictiveness_efficiency_slide = prs.slides.add_slide(effictiveness_efficiency_slide_layout)
effictiveness_efficiency(effictiveness_efficiency_slide,
data,
bin_dict,
scenario)
# Appendix Section
appendix_section_slide_layout = slides[13].slide_layout
appendix_section_slide = prs.slides.add_slide(appendix_section_slide_layout)
if tuned_model_dict is not None:
## Add title
placeholders = appendix_section_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = appendix_section_slide.placeholders[ph_idx[0]]
title_ph.text = 'Appendix'
section_ph = appendix_section_slide.placeholders[ph_idx[1]]
section_ph.text = 'Section 3'
# Add transformations
# if transform_dict is not None:
# # Transformations
# transformation_slide_layout = slides[14].slide_layout
# transformation_slide = prs.slides.add_slide(transformation_slide_layout)
# transformation_slide = title_and_table(slide=transformation_slide,
# title='Transformations',
# df=transformations(transform_dict),
# custom_table_height=True
# )
# Add model summary
# Model Summary
model_metrics_df = model_metrics(tuned_model_dict, False)
model_summary_slide_layout = slides[15].slide_layout
model_summary_slide = prs.slides.add_slide(model_summary_slide_layout)
model_summary_slide = title_and_table(slide=model_summary_slide,
title='Model Summary',
df=model_metrics_df,
custom_table_height=True
)
# Last Slide
last_slide_layout = slides[num_slides - 1].slide_layout
last_slide = prs.slides.add_slide(last_slide_layout)
# Add title
placeholders = last_slide.placeholders
ph_idx = [ph.placeholder_format.idx for ph in placeholders]
title_ph = last_slide.placeholders[ph_idx[0]]
title_ph.text = 'Thank You'
# Remove template slides
xml_slides = prs.slides._sldIdLst
slides = list(xml_slides)
for index in range(num_slides):
xml_slides.remove(slides[index])
# prs.save(ppt_path)
# save the output into binary form
binary_output = BytesIO()
prs.save(binary_output)
return binary_output