Samkeet-Blend360
commited on
Commit
·
53c950b
1
Parent(s):
ea88218
Fix
Browse files- pages/5_Glossary.py → 5_Glossary.py +35 -35
- __pycache__/utilities.cpython-310.pyc +0 -0
- pages/2_Scenario_Planner.py +0 -0
- pages/3_Saved_Scenarios.py +40 -23
- response_curves_input_file.xlsx +0 -0
- summary_df.pkl +2 -2
- utilities.py +17 -5
pages/5_Glossary.py → 5_Glossary.py
RENAMED
@@ -1,36 +1,36 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
|
3 |
-
# st.set_page_config(
|
4 |
-
# layout="wide"
|
5 |
-
# )
|
6 |
-
|
7 |
-
def glossary_run():
|
8 |
-
st.header("Glossary")
|
9 |
-
with st.expander("Model MMM Terminology"):
|
10 |
-
st.subheader("Glossary of MMM Terminology")
|
11 |
-
st.write("**• Model R-squared \(R\)\:** This is a statistical measure used to determine the percentage of variation in the dependent variable that the independent variables explain collectively. It ranges between 0 and 1, where 1 indicates a perfect fit and 0 indicates no linear relationship. An R2 greater than 0.8 usually indicates a great model fit.")
|
12 |
-
|
13 |
-
st.write("**• Mean Absolute Percentage Error \(MAPE\):** This is a measure used to determine the accuracy of a predictive model. It calculates the average absolute percentage difference between the actual and predicted values, expressing the result as a percentage to provide a sense of scale for the error.")
|
14 |
-
|
15 |
-
st.write("**• Media & Baseline Elasticity:** It refers to the percentage change in the number of prospects in response to a percentage change in a marketing input \(media channel spends\) or a baseline factor \(like seasonality. macro factors, competitors spending, etc.\). It is a measure of the responsiveness of the number of prospects to changes in the marketing input or the baseline factor")
|
16 |
-
|
17 |
-
st.write("**• Media Half-Life:** This represents the time it takes for a media spend's impact to reduce to half of its initial impact. It is a key aspect of media decay rates, which represent how the effect of advertising diminishes over time \(in weeks\). This term refers to a curve that illustrates the relationship between media spend and the resulting number of prospects.")
|
18 |
-
|
19 |
-
st.write("**• Support:** Equivalent to Impression or Click depending on the media channel.")
|
20 |
-
|
21 |
-
st.write("**• Contribution Share:** Unit is %. It refers to the percentage contribution of a specific marketing channel to the number of prospects. It is calculated by dividing the contribution from a particular channel by the total number of prospects from all media channels \(not including base contributions\).")
|
22 |
-
|
23 |
-
st.write("**• Spend Share:** Unit is %. It refers to the percentage of the total marketing budget that is allocated to a specific marketing channel. It is calculated by dividing the amount spent on a particular channel by the total marketing spend")
|
24 |
-
|
25 |
-
st.write("**• Support Share:** Unit is %. It refers to the percentage of the total media impression that is allocated to a specific marketing channel. It is calculated by dividing support on a particular channel by the total marketing spend")
|
26 |
-
|
27 |
-
st.write("**• Efficiency Index:** it is a metric that measures the cost-effectiveness of a campaign. It is calculated by dividing Contribution Share by Spend Share. An efficiency index above 1 suggests that a channel is more cost-effective than the benchmark, while an efficiency index below 1 suggests it is less cost-effective. The higher the efficiency index, the more cost-effective its channel is")
|
28 |
-
|
29 |
-
st.write("**• Effectiveness Index:** It is a metric that measures how well a particular marketing channel is performing relative to its support/impression. It is calculated by dividing the Contribution Share by the Spend Share for each channel")
|
30 |
-
|
31 |
-
st.write("**• Estimated CPM \(Cost Per Thousand Impressions\):** This is an estimation of the cost for every thousand impressions \(or views\) of its advertisement via that media channel. The default values are generated from historical averages.")
|
32 |
-
|
33 |
-
st.write("**• Estimated CPC \(Cost Per Click\):** This is an estimation of the cost for each time someone clicks on its advertisement via that media channel. The default values are generated from historical averages.")
|
34 |
-
with st.expander("Deployment Plan"):
|
35 |
-
st.image(r"image (2).png")
|
36 |
glossary_run()
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
# st.set_page_config(
|
4 |
+
# layout="wide"
|
5 |
+
# )
|
6 |
+
|
7 |
+
def glossary_run():
|
8 |
+
st.header("Glossary")
|
9 |
+
with st.expander("Model MMM Terminology"):
|
10 |
+
st.subheader("Glossary of MMM Terminology")
|
11 |
+
st.write("**• Model R-squared \(R\)\:** This is a statistical measure used to determine the percentage of variation in the dependent variable that the independent variables explain collectively. It ranges between 0 and 1, where 1 indicates a perfect fit and 0 indicates no linear relationship. An R2 greater than 0.8 usually indicates a great model fit.")
|
12 |
+
|
13 |
+
st.write("**• Mean Absolute Percentage Error \(MAPE\):** This is a measure used to determine the accuracy of a predictive model. It calculates the average absolute percentage difference between the actual and predicted values, expressing the result as a percentage to provide a sense of scale for the error.")
|
14 |
+
|
15 |
+
st.write("**• Media & Baseline Elasticity:** It refers to the percentage change in the number of prospects in response to a percentage change in a marketing input \(media channel spends\) or a baseline factor \(like seasonality. macro factors, competitors spending, etc.\). It is a measure of the responsiveness of the number of prospects to changes in the marketing input or the baseline factor")
|
16 |
+
|
17 |
+
st.write("**• Media Half-Life:** This represents the time it takes for a media spend's impact to reduce to half of its initial impact. It is a key aspect of media decay rates, which represent how the effect of advertising diminishes over time \(in weeks\). This term refers to a curve that illustrates the relationship between media spend and the resulting number of prospects.")
|
18 |
+
|
19 |
+
st.write("**• Support:** Equivalent to Impression or Click depending on the media channel.")
|
20 |
+
|
21 |
+
st.write("**• Contribution Share:** Unit is %. It refers to the percentage contribution of a specific marketing channel to the number of prospects. It is calculated by dividing the contribution from a particular channel by the total number of prospects from all media channels \(not including base contributions\).")
|
22 |
+
|
23 |
+
st.write("**• Spend Share:** Unit is %. It refers to the percentage of the total marketing budget that is allocated to a specific marketing channel. It is calculated by dividing the amount spent on a particular channel by the total marketing spend")
|
24 |
+
|
25 |
+
st.write("**• Support Share:** Unit is %. It refers to the percentage of the total media impression that is allocated to a specific marketing channel. It is calculated by dividing support on a particular channel by the total marketing spend")
|
26 |
+
|
27 |
+
st.write("**• Efficiency Index:** it is a metric that measures the cost-effectiveness of a campaign. It is calculated by dividing Contribution Share by Spend Share. An efficiency index above 1 suggests that a channel is more cost-effective than the benchmark, while an efficiency index below 1 suggests it is less cost-effective. The higher the efficiency index, the more cost-effective its channel is")
|
28 |
+
|
29 |
+
st.write("**• Effectiveness Index:** It is a metric that measures how well a particular marketing channel is performing relative to its support/impression. It is calculated by dividing the Contribution Share by the Spend Share for each channel")
|
30 |
+
|
31 |
+
st.write("**• Estimated CPM \(Cost Per Thousand Impressions\):** This is an estimation of the cost for every thousand impressions \(or views\) of its advertisement via that media channel. The default values are generated from historical averages.")
|
32 |
+
|
33 |
+
st.write("**• Estimated CPC \(Cost Per Click\):** This is an estimation of the cost for each time someone clicks on its advertisement via that media channel. The default values are generated from historical averages.")
|
34 |
+
with st.expander("Deployment Plan"):
|
35 |
+
st.image(r"image (2).png")
|
36 |
glossary_run()
|
__pycache__/utilities.cpython-310.pyc
CHANGED
Binary files a/__pycache__/utilities.cpython-310.pyc and b/__pycache__/utilities.cpython-310.pyc differ
|
|
pages/2_Scenario_Planner.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pages/3_Saved_Scenarios.py
CHANGED
@@ -100,10 +100,11 @@ def comparison_scenarios_df():
|
|
100 |
efficiency_df = pd.DataFrame(index = summary_df_prospect.index)
|
101 |
|
102 |
for c in summary_df_spend.columns:
|
103 |
-
efficiency_df[c] = (summary_df_prospect[c]
|
104 |
efficiency_df[c] = efficiency_df[c].round(2)
|
105 |
-
|
106 |
-
return summary_df_spend,summary_df_prospect,efficiency_df
|
|
|
107 |
import matplotlib.colors as mcolors
|
108 |
import plotly.colors as pc
|
109 |
|
@@ -175,7 +176,7 @@ def plot_comparison_chart(df,metric,custom_colors):
|
|
175 |
|
176 |
# Create the layout
|
177 |
layout = go.Layout(
|
178 |
-
title=
|
179 |
xaxis_title="Channels",
|
180 |
yaxis_title=metric,
|
181 |
barmode='group'
|
@@ -240,22 +241,23 @@ def create_comparison_plots():
|
|
240 |
|
241 |
custom_colors = generate_color_gradient(blue, red, spends_df.shape[1])
|
242 |
st.plotly_chart(plot_comparison_chart(spends_df,"Spends",custom_colors),use_container_width=True)
|
243 |
-
st.plotly_chart(plot_comparison_chart(prospects_df,"
|
244 |
-
st.plotly_chart(plot_comparison_chart(efficiency_df,"
|
245 |
|
246 |
fig1 = plot_comparison_chart(spends_df,"Spends",custom_colors)
|
247 |
-
fig2 = plot_comparison_chart(prospects_df,"
|
248 |
-
fig3 = plot_comparison_chart(efficiency_df,"
|
249 |
-
|
250 |
-
ppt_file = save_ppt_file(fig1,fig2,fig3)
|
251 |
-
# Add a download button
|
252 |
-
st.download_button(
|
253 |
-
label="Download Comparision Analysis",
|
254 |
-
data=ppt_file,
|
255 |
-
file_name="MMM_Scenario_Comparision.pptx",
|
256 |
-
mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
257 |
-
)
|
258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
|
260 |
def create_scenario_summary(scenario_dict):
|
261 |
summary_rows = []
|
@@ -268,8 +270,19 @@ def create_scenario_summary(scenario_dict):
|
|
268 |
# st.write(modified_total_sales)
|
269 |
# st.write(actual_total_sales[0])
|
270 |
# st.write(modified_total_spends[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
for channel_dict in scenario_dict['channels']:
|
272 |
-
# st.write(channel_dict['name'])
|
273 |
name_mod = channel_name_formating(channel_dict['name'])
|
274 |
summary_rows.append([name_mod,
|
275 |
channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
|
@@ -304,14 +317,16 @@ def create_scenario_summary(scenario_dict):
|
|
304 |
])
|
305 |
|
306 |
adf = pd.DataFrame(summary_rows)
|
307 |
-
|
308 |
-
|
|
|
|
|
309 |
adf.columns = ["1","2","3","4","5","6","7"]
|
310 |
adf.index = adf["1"].to_list() #["1","2","3","4","5","6","7","8","9","10","11","12","13","14"]
|
311 |
adf.drop(columns= ["1"],inplace= True)
|
312 |
# columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["",""])
|
313 |
# columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""]))
|
314 |
-
columns_index = pd.MultiIndex.from_product([['Spends','
|
315 |
adf.columns = columns_index
|
316 |
return adf # pd.DataFrame(summary_rows, columns=columns_index)
|
317 |
|
@@ -460,7 +475,7 @@ def download_scenarios():
|
|
460 |
# scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')
|
461 |
])
|
462 |
columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
|
463 |
-
columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','
|
464 |
# 'ROI','MROI','Spends per NRPU'
|
465 |
]], names=["first", "second"]))
|
466 |
if summary_df is None:
|
@@ -470,6 +485,8 @@ def download_scenarios():
|
|
470 |
_df = pd.DataFrame(summary_rows, columns = columns_index)
|
471 |
_df = _df.set_index(('','Channel'))
|
472 |
summary_df = summary_df.merge(_df, left_index=True, right_index=True)
|
|
|
|
|
473 |
ws = wb.create_sheet('Summary',0)
|
474 |
summary_df_to_worksheet(summary_df.reset_index(), ws)
|
475 |
wb.save(st.session_state['xlsx_buffer'])
|
@@ -560,7 +577,7 @@ if auth_status == True:
|
|
560 |
# with column_3:
|
561 |
# st.button('Load Scenario', on_click=load_scenario)
|
562 |
|
563 |
-
selected_scenario_details = saved_scenarios[selected_scenario]
|
564 |
|
565 |
pd.set_option('display.max_colwidth', 100)
|
566 |
# st.table(create_scenario_summary(selected_scenario_details))
|
|
|
100 |
efficiency_df = pd.DataFrame(index = summary_df_prospect.index)
|
101 |
|
102 |
for c in summary_df_spend.columns:
|
103 |
+
efficiency_df[c] = (summary_df_prospect[c])/(summary_df_spend[c])
|
104 |
efficiency_df[c] = efficiency_df[c].round(2)
|
105 |
+
|
106 |
+
return summary_df_spend, summary_df_prospect, efficiency_df
|
107 |
+
|
108 |
import matplotlib.colors as mcolors
|
109 |
import plotly.colors as pc
|
110 |
|
|
|
176 |
|
177 |
# Create the layout
|
178 |
layout = go.Layout(
|
179 |
+
title=metric,
|
180 |
xaxis_title="Channels",
|
181 |
yaxis_title=metric,
|
182 |
barmode='group'
|
|
|
241 |
|
242 |
custom_colors = generate_color_gradient(blue, red, spends_df.shape[1])
|
243 |
st.plotly_chart(plot_comparison_chart(spends_df,"Spends",custom_colors),use_container_width=True)
|
244 |
+
st.plotly_chart(plot_comparison_chart(prospects_df,"Revenue",custom_colors),use_container_width=True)
|
245 |
+
st.plotly_chart(plot_comparison_chart(efficiency_df,"ROI",custom_colors),use_container_width=True)
|
246 |
|
247 |
fig1 = plot_comparison_chart(spends_df,"Spends",custom_colors)
|
248 |
+
fig2 = plot_comparison_chart(prospects_df,"Revenue",custom_colors)
|
249 |
+
fig3 = plot_comparison_chart(efficiency_df,"ROI",custom_colors)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
|
251 |
+
# ppt_file = save_ppt_file(fig1,fig2,fig3)
|
252 |
+
# # Add a download button
|
253 |
+
# st.download_button(
|
254 |
+
# label="Download Comparision Analysis",
|
255 |
+
# data=ppt_file,
|
256 |
+
# file_name="MMM_Scenario_Comparision.pptx",
|
257 |
+
# mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
258 |
+
# )
|
259 |
+
|
260 |
+
import numpy as np
|
261 |
|
262 |
def create_scenario_summary(scenario_dict):
|
263 |
summary_rows = []
|
|
|
270 |
# st.write(modified_total_sales)
|
271 |
# st.write(actual_total_sales[0])
|
272 |
# st.write(modified_total_spends[0])
|
273 |
+
# st.write(scenario_dict)
|
274 |
+
|
275 |
+
if scenario_dict == {} or 'channels' not in scenario_dict.keys():
|
276 |
+
st.warning("Save scenario properly again!")
|
277 |
+
st.stop()
|
278 |
+
|
279 |
+
# Ensure 'channels' exists and is a list
|
280 |
+
if "channels" in scenario_dict and isinstance(scenario_dict['channels'], list):
|
281 |
+
for channel in scenario_dict['channels']:
|
282 |
+
if channel.get("name") == "Connected & OTTTV": # Check if the key "name" has the incorrect value
|
283 |
+
channel["name"] = "Connected & OTT TV" # Replace it
|
284 |
+
|
285 |
for channel_dict in scenario_dict['channels']:
|
|
|
286 |
name_mod = channel_name_formating(channel_dict['name'])
|
287 |
summary_rows.append([name_mod,
|
288 |
channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
|
|
|
317 |
])
|
318 |
|
319 |
adf = pd.DataFrame(summary_rows)
|
320 |
+
|
321 |
+
adf[5] = adf[3] / adf[1].replace(0, np.nan)
|
322 |
+
adf[6] = adf[4] / adf[2].replace(0, np.nan)
|
323 |
+
|
324 |
adf.columns = ["1","2","3","4","5","6","7"]
|
325 |
adf.index = adf["1"].to_list() #["1","2","3","4","5","6","7","8","9","10","11","12","13","14"]
|
326 |
adf.drop(columns= ["1"],inplace= True)
|
327 |
# columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["",""])
|
328 |
# columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""]))
|
329 |
+
columns_index = pd.MultiIndex.from_product([['Spends','Revenue',"ROI"],['Actual','Simulated']], names=["",""])
|
330 |
adf.columns = columns_index
|
331 |
return adf # pd.DataFrame(summary_rows, columns=columns_index)
|
332 |
|
|
|
475 |
# scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')
|
476 |
])
|
477 |
columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
|
478 |
+
columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','Revenue',
|
479 |
# 'ROI','MROI','Spends per NRPU'
|
480 |
]], names=["first", "second"]))
|
481 |
if summary_df is None:
|
|
|
485 |
_df = pd.DataFrame(summary_rows, columns = columns_index)
|
486 |
_df = _df.set_index(('','Channel'))
|
487 |
summary_df = summary_df.merge(_df, left_index=True, right_index=True)
|
488 |
+
|
489 |
+
st.write(summary_df)
|
490 |
ws = wb.create_sheet('Summary',0)
|
491 |
summary_df_to_worksheet(summary_df.reset_index(), ws)
|
492 |
wb.save(st.session_state['xlsx_buffer'])
|
|
|
577 |
# with column_3:
|
578 |
# st.button('Load Scenario', on_click=load_scenario)
|
579 |
|
580 |
+
selected_scenario_details = saved_scenarios[selected_scenario].copy()
|
581 |
|
582 |
pd.set_option('display.max_colwidth', 100)
|
583 |
# st.table(create_scenario_summary(selected_scenario_details))
|
response_curves_input_file.xlsx
CHANGED
Binary files a/response_curves_input_file.xlsx and b/response_curves_input_file.xlsx differ
|
|
summary_df.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4279102a6e3c64422e780dad800d1de417ff74112d57e124b790a1f6af5376ea
|
3 |
+
size 1820
|
utilities.py
CHANGED
@@ -216,9 +216,19 @@ def initialize_data(
|
|
216 |
excel = pd.read_excel(target_file, sheet_name=None)
|
217 |
|
218 |
# Extract dataframes for raw data, spend input, and contribution MMM
|
219 |
-
raw_df = excel["RAW DATA MMM"]
|
220 |
-
spend_df = excel["SPEND INPUT"]
|
221 |
-
contri_df = excel["CONTRIBUTION MMM"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
# Check if the panel is not None
|
224 |
|
@@ -292,6 +302,7 @@ def initialize_data(
|
|
292 |
|
293 |
params = pd.read_excel("response_curves_parameters.xlsx",index_col = "channel")
|
294 |
param_dicts = {col: params[col].to_dict() for col in params.columns}
|
|
|
295 |
response_curves[inp_col] = {
|
296 |
"Kd": param_dicts["Kd"][inp_col],
|
297 |
"n": param_dicts["n"][inp_col],
|
@@ -760,6 +771,7 @@ def decimal_formater(num_string, n_decimals=1):
|
|
760 |
|
761 |
|
762 |
def channel_name_formating(channel_name):
|
|
|
763 |
name_mod = channel_name.replace("_", " ")
|
764 |
if name_mod.lower().endswith(" imp"):
|
765 |
name_mod = name_mod.replace("Imp", "Spend")
|
@@ -779,9 +791,9 @@ def channel_name_formating(channel_name):
|
|
779 |
"Email" :"Email" ,
|
780 |
"SearchBrand": "Search Brand",
|
781 |
"DisplayRetargeting" : "Display Retargeting" ,
|
782 |
-
"
|
783 |
}
|
784 |
-
return key_dict[channel_name]
|
785 |
|
786 |
|
787 |
def send_email(email, message):
|
|
|
216 |
excel = pd.read_excel(target_file, sheet_name=None)
|
217 |
|
218 |
# Extract dataframes for raw data, spend input, and contribution MMM
|
219 |
+
# raw_df = excel["RAW DATA MMM"]
|
220 |
+
# spend_df = excel["SPEND INPUT"]
|
221 |
+
# contri_df = excel["CONTRIBUTION MMM"]
|
222 |
+
|
223 |
+
# Function to strip spaces from column names and string values in a DataFrame
|
224 |
+
def clean_dataframe(df):
|
225 |
+
df.columns = df.columns.str.strip() # Remove spaces from column names
|
226 |
+
return df.applymap(lambda x: x.strip() if isinstance(x, str) else x) # Remove spaces from string values
|
227 |
+
|
228 |
+
# Apply the function to all DataFrames
|
229 |
+
raw_df = clean_dataframe(excel["RAW DATA MMM"])
|
230 |
+
spend_df = clean_dataframe(excel["SPEND INPUT"])
|
231 |
+
contri_df = clean_dataframe(excel["CONTRIBUTION MMM"])
|
232 |
|
233 |
# Check if the panel is not None
|
234 |
|
|
|
302 |
|
303 |
params = pd.read_excel("response_curves_parameters.xlsx",index_col = "channel")
|
304 |
param_dicts = {col: params[col].to_dict() for col in params.columns}
|
305 |
+
|
306 |
response_curves[inp_col] = {
|
307 |
"Kd": param_dicts["Kd"][inp_col],
|
308 |
"n": param_dicts["n"][inp_col],
|
|
|
771 |
|
772 |
|
773 |
def channel_name_formating(channel_name):
|
774 |
+
channel_name = channel_name.strip()
|
775 |
name_mod = channel_name.replace("_", " ")
|
776 |
if name_mod.lower().endswith(" imp"):
|
777 |
name_mod = name_mod.replace("Imp", "Spend")
|
|
|
791 |
"Email" :"Email" ,
|
792 |
"SearchBrand": "Search Brand",
|
793 |
"DisplayRetargeting" : "Display Retargeting" ,
|
794 |
+
"Video":"Video"
|
795 |
}
|
796 |
+
return key_dict[channel_name].replace("Connected & OTTTV", "Connected & OTT TV")
|
797 |
|
798 |
|
799 |
def send_email(email, message):
|