Samkeet-Blend360 commited on
Commit
53c950b
·
1 Parent(s): ea88218
pages/5_Glossary.py → 5_Glossary.py RENAMED
@@ -1,36 +1,36 @@
1
- import streamlit as st
2
-
3
- # st.set_page_config(
4
- # layout="wide"
5
- # )
6
-
7
- def glossary_run():
8
- st.header("Glossary")
9
- with st.expander("Model MMM Terminology"):
10
- st.subheader("Glossary of MMM Terminology")
11
- st.write("**• Model R-squared \(R\)\:** This is a statistical measure used to determine the percentage of variation in the dependent variable that the independent variables explain collectively. It ranges between 0 and 1, where 1 indicates a perfect fit and 0 indicates no linear relationship. An R2 greater than 0.8 usually indicates a great model fit.")
12
-
13
- st.write("**• Mean Absolute Percentage Error \(MAPE\):** This is a measure used to determine the accuracy of a predictive model. It calculates the average absolute percentage difference between the actual and predicted values, expressing the result as a percentage to provide a sense of scale for the error.")
14
-
15
- st.write("**• Media & Baseline Elasticity:** It refers to the percentage change in the number of prospects in response to a percentage change in a marketing input \(media channel spends\) or a baseline factor \(like seasonality. macro factors, competitors spending, etc.\). It is a measure of the responsiveness of the number of prospects to changes in the marketing input or the baseline factor")
16
-
17
- st.write("**• Media Half-Life:** This represents the time it takes for a media spend's impact to reduce to half of its initial impact. It is a key aspect of media decay rates, which represent how the effect of advertising diminishes over time \(in weeks\). This term refers to a curve that illustrates the relationship between media spend and the resulting number of prospects.")
18
-
19
- st.write("**• Support:** Equivalent to Impression or Click depending on the media channel.")
20
-
21
- st.write("**• Contribution Share:** Unit is %. It refers to the percentage contribution of a specific marketing channel to the number of prospects. It is calculated by dividing the contribution from a particular channel by the total number of prospects from all media channels \(not including base contributions\).")
22
-
23
- st.write("**• Spend Share:** Unit is %. It refers to the percentage of the total marketing budget that is allocated to a specific marketing channel. It is calculated by dividing the amount spent on a particular channel by the total marketing spend")
24
-
25
- st.write("**• Support Share:** Unit is %. It refers to the percentage of the total media impression that is allocated to a specific marketing channel. It is calculated by dividing support on a particular channel by the total marketing spend")
26
-
27
- st.write("**• Efficiency Index:** it is a metric that measures the cost-effectiveness of a campaign. It is calculated by dividing Contribution Share by Spend Share. An efficiency index above 1 suggests that a channel is more cost-effective than the benchmark, while an efficiency index below 1 suggests it is less cost-effective. The higher the efficiency index, the more cost-effective its channel is")
28
-
29
- st.write("**• Effectiveness Index:** It is a metric that measures how well a particular marketing channel is performing relative to its support/impression. It is calculated by dividing the Contribution Share by the Spend Share for each channel")
30
-
31
- st.write("**• Estimated CPM \(Cost Per Thousand Impressions\):** This is an estimation of the cost for every thousand impressions \(or views\) of its advertisement via that media channel. The default values are generated from historical averages.")
32
-
33
- st.write("**• Estimated CPC \(Cost Per Click\):** This is an estimation of the cost for each time someone clicks on its advertisement via that media channel. The default values are generated from historical averages.")
34
- with st.expander("Deployment Plan"):
35
- st.image(r"image (2).png")
36
  glossary_run()
 
1
+ import streamlit as st
2
+
3
+ # st.set_page_config(
4
+ # layout="wide"
5
+ # )
6
+
7
+ def glossary_run():
8
+ st.header("Glossary")
9
+ with st.expander("Model MMM Terminology"):
10
+ st.subheader("Glossary of MMM Terminology")
11
+ st.write("**• Model R-squared \(R\)\:** This is a statistical measure used to determine the percentage of variation in the dependent variable that the independent variables explain collectively. It ranges between 0 and 1, where 1 indicates a perfect fit and 0 indicates no linear relationship. An R2 greater than 0.8 usually indicates a great model fit.")
12
+
13
+ st.write("**• Mean Absolute Percentage Error \(MAPE\):** This is a measure used to determine the accuracy of a predictive model. It calculates the average absolute percentage difference between the actual and predicted values, expressing the result as a percentage to provide a sense of scale for the error.")
14
+
15
+ st.write("**• Media & Baseline Elasticity:** It refers to the percentage change in the number of prospects in response to a percentage change in a marketing input \(media channel spends\) or a baseline factor \(like seasonality. macro factors, competitors spending, etc.\). It is a measure of the responsiveness of the number of prospects to changes in the marketing input or the baseline factor")
16
+
17
+ st.write("**• Media Half-Life:** This represents the time it takes for a media spend's impact to reduce to half of its initial impact. It is a key aspect of media decay rates, which represent how the effect of advertising diminishes over time \(in weeks\). This term refers to a curve that illustrates the relationship between media spend and the resulting number of prospects.")
18
+
19
+ st.write("**• Support:** Equivalent to Impression or Click depending on the media channel.")
20
+
21
+ st.write("**• Contribution Share:** Unit is %. It refers to the percentage contribution of a specific marketing channel to the number of prospects. It is calculated by dividing the contribution from a particular channel by the total number of prospects from all media channels \(not including base contributions\).")
22
+
23
+ st.write("**• Spend Share:** Unit is %. It refers to the percentage of the total marketing budget that is allocated to a specific marketing channel. It is calculated by dividing the amount spent on a particular channel by the total marketing spend")
24
+
25
+ st.write("**• Support Share:** Unit is %. It refers to the percentage of the total media impression that is allocated to a specific marketing channel. It is calculated by dividing support on a particular channel by the total marketing spend")
26
+
27
+ st.write("**• Efficiency Index:** it is a metric that measures the cost-effectiveness of a campaign. It is calculated by dividing Contribution Share by Spend Share. An efficiency index above 1 suggests that a channel is more cost-effective than the benchmark, while an efficiency index below 1 suggests it is less cost-effective. The higher the efficiency index, the more cost-effective its channel is")
28
+
29
+ st.write("**• Effectiveness Index:** It is a metric that measures how well a particular marketing channel is performing relative to its support/impression. It is calculated by dividing the Contribution Share by the Spend Share for each channel")
30
+
31
+ st.write("**• Estimated CPM \(Cost Per Thousand Impressions\):** This is an estimation of the cost for every thousand impressions \(or views\) of its advertisement via that media channel. The default values are generated from historical averages.")
32
+
33
+ st.write("**• Estimated CPC \(Cost Per Click\):** This is an estimation of the cost for each time someone clicks on its advertisement via that media channel. The default values are generated from historical averages.")
34
+ with st.expander("Deployment Plan"):
35
+ st.image(r"image (2).png")
36
  glossary_run()
__pycache__/utilities.cpython-310.pyc CHANGED
Binary files a/__pycache__/utilities.cpython-310.pyc and b/__pycache__/utilities.cpython-310.pyc differ
 
pages/2_Scenario_Planner.py CHANGED
The diff for this file is too large to render. See raw diff
 
pages/3_Saved_Scenarios.py CHANGED
@@ -100,10 +100,11 @@ def comparison_scenarios_df():
100
  efficiency_df = pd.DataFrame(index = summary_df_prospect.index)
101
 
102
  for c in summary_df_spend.columns:
103
- efficiency_df[c] = (summary_df_prospect[c]/summary_df_prospect[c].sum())/(summary_df_spend[c]/summary_df_spend[c].sum())
104
  efficiency_df[c] = efficiency_df[c].round(2)
105
-
106
- return summary_df_spend,summary_df_prospect,efficiency_df
 
107
  import matplotlib.colors as mcolors
108
  import plotly.colors as pc
109
 
@@ -175,7 +176,7 @@ def plot_comparison_chart(df,metric,custom_colors):
175
 
176
  # Create the layout
177
  layout = go.Layout(
178
- title='Comparing '+ metric,
179
  xaxis_title="Channels",
180
  yaxis_title=metric,
181
  barmode='group'
@@ -240,22 +241,23 @@ def create_comparison_plots():
240
 
241
  custom_colors = generate_color_gradient(blue, red, spends_df.shape[1])
242
  st.plotly_chart(plot_comparison_chart(spends_df,"Spends",custom_colors),use_container_width=True)
243
- st.plotly_chart(plot_comparison_chart(prospects_df,"Contributions",custom_colors),use_container_width=True)
244
- st.plotly_chart(plot_comparison_chart(efficiency_df,"Efficiency",custom_colors),use_container_width=True)
245
 
246
  fig1 = plot_comparison_chart(spends_df,"Spends",custom_colors)
247
- fig2 = plot_comparison_chart(prospects_df,"Contributions",custom_colors)
248
- fig3 = plot_comparison_chart(efficiency_df,"Efficiency",custom_colors)
249
-
250
- ppt_file = save_ppt_file(fig1,fig2,fig3)
251
- # Add a download button
252
- st.download_button(
253
- label="Download Comparision Analysis",
254
- data=ppt_file,
255
- file_name="MMM_Scenario_Comparision.pptx",
256
- mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
257
- )
258
 
 
 
 
 
 
 
 
 
 
 
259
 
260
  def create_scenario_summary(scenario_dict):
261
  summary_rows = []
@@ -268,8 +270,19 @@ def create_scenario_summary(scenario_dict):
268
  # st.write(modified_total_sales)
269
  # st.write(actual_total_sales[0])
270
  # st.write(modified_total_spends[0])
 
 
 
 
 
 
 
 
 
 
 
 
271
  for channel_dict in scenario_dict['channels']:
272
- # st.write(channel_dict['name'])
273
  name_mod = channel_name_formating(channel_dict['name'])
274
  summary_rows.append([name_mod,
275
  channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
@@ -304,14 +317,16 @@ def create_scenario_summary(scenario_dict):
304
  ])
305
 
306
  adf = pd.DataFrame(summary_rows)
307
- # st.write(adf.columns)
308
-
 
 
309
  adf.columns = ["1","2","3","4","5","6","7"]
310
  adf.index = adf["1"].to_list() #["1","2","3","4","5","6","7","8","9","10","11","12","13","14"]
311
  adf.drop(columns= ["1"],inplace= True)
312
  # columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["",""])
313
  # columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""]))
314
- columns_index = pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""])
315
  adf.columns = columns_index
316
  return adf # pd.DataFrame(summary_rows, columns=columns_index)
317
 
@@ -460,7 +475,7 @@ def download_scenarios():
460
  # scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')
461
  ])
462
  columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
463
- columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','Prospects',
464
  # 'ROI','MROI','Spends per NRPU'
465
  ]], names=["first", "second"]))
466
  if summary_df is None:
@@ -470,6 +485,8 @@ def download_scenarios():
470
  _df = pd.DataFrame(summary_rows, columns = columns_index)
471
  _df = _df.set_index(('','Channel'))
472
  summary_df = summary_df.merge(_df, left_index=True, right_index=True)
 
 
473
  ws = wb.create_sheet('Summary',0)
474
  summary_df_to_worksheet(summary_df.reset_index(), ws)
475
  wb.save(st.session_state['xlsx_buffer'])
@@ -560,7 +577,7 @@ if auth_status == True:
560
  # with column_3:
561
  # st.button('Load Scenario', on_click=load_scenario)
562
 
563
- selected_scenario_details = saved_scenarios[selected_scenario]
564
 
565
  pd.set_option('display.max_colwidth', 100)
566
  # st.table(create_scenario_summary(selected_scenario_details))
 
100
  efficiency_df = pd.DataFrame(index = summary_df_prospect.index)
101
 
102
  for c in summary_df_spend.columns:
103
+ efficiency_df[c] = (summary_df_prospect[c])/(summary_df_spend[c])
104
  efficiency_df[c] = efficiency_df[c].round(2)
105
+
106
+ return summary_df_spend, summary_df_prospect, efficiency_df
107
+
108
  import matplotlib.colors as mcolors
109
  import plotly.colors as pc
110
 
 
176
 
177
  # Create the layout
178
  layout = go.Layout(
179
+ title=metric,
180
  xaxis_title="Channels",
181
  yaxis_title=metric,
182
  barmode='group'
 
241
 
242
  custom_colors = generate_color_gradient(blue, red, spends_df.shape[1])
243
  st.plotly_chart(plot_comparison_chart(spends_df,"Spends",custom_colors),use_container_width=True)
244
+ st.plotly_chart(plot_comparison_chart(prospects_df,"Revenue",custom_colors),use_container_width=True)
245
+ st.plotly_chart(plot_comparison_chart(efficiency_df,"ROI",custom_colors),use_container_width=True)
246
 
247
  fig1 = plot_comparison_chart(spends_df,"Spends",custom_colors)
248
+ fig2 = plot_comparison_chart(prospects_df,"Revenue",custom_colors)
249
+ fig3 = plot_comparison_chart(efficiency_df,"ROI",custom_colors)
 
 
 
 
 
 
 
 
 
250
 
251
+ # ppt_file = save_ppt_file(fig1,fig2,fig3)
252
+ # # Add a download button
253
+ # st.download_button(
254
+ # label="Download Comparision Analysis",
255
+ # data=ppt_file,
256
+ # file_name="MMM_Scenario_Comparision.pptx",
257
+ # mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
258
+ # )
259
+
260
+ import numpy as np
261
 
262
  def create_scenario_summary(scenario_dict):
263
  summary_rows = []
 
270
  # st.write(modified_total_sales)
271
  # st.write(actual_total_sales[0])
272
  # st.write(modified_total_spends[0])
273
+ # st.write(scenario_dict)
274
+
275
+ if scenario_dict == {} or 'channels' not in scenario_dict.keys():
276
+ st.warning("Save scenario properly again!")
277
+ st.stop()
278
+
279
+ # Ensure 'channels' exists and is a list
280
+ if "channels" in scenario_dict and isinstance(scenario_dict['channels'], list):
281
+ for channel in scenario_dict['channels']:
282
+ if channel.get("name") == "Connected & OTTTV": # Check if the key "name" has the incorrect value
283
+ channel["name"] = "Connected & OTT TV" # Replace it
284
+
285
  for channel_dict in scenario_dict['channels']:
 
286
  name_mod = channel_name_formating(channel_dict['name'])
287
  summary_rows.append([name_mod,
288
  channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
 
317
  ])
318
 
319
  adf = pd.DataFrame(summary_rows)
320
+
321
+ adf[5] = adf[3] / adf[1].replace(0, np.nan)
322
+ adf[6] = adf[4] / adf[2].replace(0, np.nan)
323
+
324
  adf.columns = ["1","2","3","4","5","6","7"]
325
  adf.index = adf["1"].to_list() #["1","2","3","4","5","6","7","8","9","10","11","12","13","14"]
326
  adf.drop(columns= ["1"],inplace= True)
327
  # columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["",""])
328
  # columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""]))
329
+ columns_index = pd.MultiIndex.from_product([['Spends','Revenue',"ROI"],['Actual','Simulated']], names=["",""])
330
  adf.columns = columns_index
331
  return adf # pd.DataFrame(summary_rows, columns=columns_index)
332
 
 
475
  # scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')
476
  ])
477
  columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
478
+ columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','Revenue',
479
  # 'ROI','MROI','Spends per NRPU'
480
  ]], names=["first", "second"]))
481
  if summary_df is None:
 
485
  _df = pd.DataFrame(summary_rows, columns = columns_index)
486
  _df = _df.set_index(('','Channel'))
487
  summary_df = summary_df.merge(_df, left_index=True, right_index=True)
488
+
489
+ st.write(summary_df)
490
  ws = wb.create_sheet('Summary',0)
491
  summary_df_to_worksheet(summary_df.reset_index(), ws)
492
  wb.save(st.session_state['xlsx_buffer'])
 
577
  # with column_3:
578
  # st.button('Load Scenario', on_click=load_scenario)
579
 
580
+ selected_scenario_details = saved_scenarios[selected_scenario].copy()
581
 
582
  pd.set_option('display.max_colwidth', 100)
583
  # st.table(create_scenario_summary(selected_scenario_details))
response_curves_input_file.xlsx CHANGED
Binary files a/response_curves_input_file.xlsx and b/response_curves_input_file.xlsx differ
 
summary_df.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3903995f3ab1eb9b34db90db9d8177955cff0a37af45969c97543cc82909a170
3
- size 1822
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4279102a6e3c64422e780dad800d1de417ff74112d57e124b790a1f6af5376ea
3
+ size 1820
utilities.py CHANGED
@@ -216,9 +216,19 @@ def initialize_data(
216
  excel = pd.read_excel(target_file, sheet_name=None)
217
 
218
  # Extract dataframes for raw data, spend input, and contribution MMM
219
- raw_df = excel["RAW DATA MMM"]
220
- spend_df = excel["SPEND INPUT"]
221
- contri_df = excel["CONTRIBUTION MMM"]
 
 
 
 
 
 
 
 
 
 
222
 
223
  # Check if the panel is not None
224
 
@@ -292,6 +302,7 @@ def initialize_data(
292
 
293
  params = pd.read_excel("response_curves_parameters.xlsx",index_col = "channel")
294
  param_dicts = {col: params[col].to_dict() for col in params.columns}
 
295
  response_curves[inp_col] = {
296
  "Kd": param_dicts["Kd"][inp_col],
297
  "n": param_dicts["n"][inp_col],
@@ -760,6 +771,7 @@ def decimal_formater(num_string, n_decimals=1):
760
 
761
 
762
  def channel_name_formating(channel_name):
 
763
  name_mod = channel_name.replace("_", " ")
764
  if name_mod.lower().endswith(" imp"):
765
  name_mod = name_mod.replace("Imp", "Spend")
@@ -779,9 +791,9 @@ def channel_name_formating(channel_name):
779
  "Email" :"Email" ,
780
  "SearchBrand": "Search Brand",
781
  "DisplayRetargeting" : "Display Retargeting" ,
782
- "\xa0Video":"Video"
783
  }
784
- return key_dict[channel_name]
785
 
786
 
787
  def send_email(email, message):
 
216
  excel = pd.read_excel(target_file, sheet_name=None)
217
 
218
  # Extract dataframes for raw data, spend input, and contribution MMM
219
+ # raw_df = excel["RAW DATA MMM"]
220
+ # spend_df = excel["SPEND INPUT"]
221
+ # contri_df = excel["CONTRIBUTION MMM"]
222
+
223
+ # Function to strip spaces from column names and string values in a DataFrame
224
+ def clean_dataframe(df):
225
+ df.columns = df.columns.str.strip() # Remove spaces from column names
226
+ return df.applymap(lambda x: x.strip() if isinstance(x, str) else x) # Remove spaces from string values
227
+
228
+ # Apply the function to all DataFrames
229
+ raw_df = clean_dataframe(excel["RAW DATA MMM"])
230
+ spend_df = clean_dataframe(excel["SPEND INPUT"])
231
+ contri_df = clean_dataframe(excel["CONTRIBUTION MMM"])
232
 
233
  # Check if the panel is not None
234
 
 
302
 
303
  params = pd.read_excel("response_curves_parameters.xlsx",index_col = "channel")
304
  param_dicts = {col: params[col].to_dict() for col in params.columns}
305
+
306
  response_curves[inp_col] = {
307
  "Kd": param_dicts["Kd"][inp_col],
308
  "n": param_dicts["n"][inp_col],
 
771
 
772
 
773
  def channel_name_formating(channel_name):
774
+ channel_name = channel_name.strip()
775
  name_mod = channel_name.replace("_", " ")
776
  if name_mod.lower().endswith(" imp"):
777
  name_mod = name_mod.replace("Imp", "Spend")
 
791
  "Email" :"Email" ,
792
  "SearchBrand": "Search Brand",
793
  "DisplayRetargeting" : "Display Retargeting" ,
794
+ "Video":"Video"
795
  }
796
+ return key_dict[channel_name].replace("Connected & OTTTV", "Connected & OTT TV")
797
 
798
 
799
  def send_email(email, message):