Spaces:

samkeet
/

MMO_Demo

Running

App Files Files Community

Samkeet-Blend360 commited on Jan 25

Commit

53c950b

1 Parent(s): ea88218

Fix

Browse files

Files changed (7) hide show

pages/5_Glossary.py → 5_Glossary.py +35 -35
__pycache__/utilities.cpython-310.pyc +0 -0
pages/2_Scenario_Planner.py +0 -0
pages/3_Saved_Scenarios.py +40 -23
response_curves_input_file.xlsx +0 -0
summary_df.pkl +2 -2
utilities.py +17 -5

pages/5_Glossary.py → 5_Glossary.py RENAMED Viewed

@@ -1,36 +1,36 @@
-import streamlit as st
-# st.set_page_config(
-#     layout="wide"
-# )
-def glossary_run():
-    st.header("Glossary")
-    with st.expander("Model MMM Terminology"):
-        st.subheader("Glossary of MMM Terminology")
-        st.write("**• Model R-squared \(R\)\:** This is a statistical measure used to determine the percentage of variation in the dependent variable that the independent variables explain collectively. It ranges between 0 and 1, where 1 indicates a perfect fit and 0 indicates no linear relationship. An R2 greater than 0.8 usually indicates a great model fit.")
-        st.write("**• Mean Absolute Percentage Error \(MAPE\):** This is a measure used to determine the accuracy of a predictive model. It calculates the average absolute percentage difference between the actual and predicted values, expressing the result as a percentage to provide a sense of scale for the error.")
-        st.write("**• Media & Baseline Elasticity:** It refers to the percentage change in the number of prospects in response to a percentage change in a marketing input \(media channel spends\) or a baseline factor \(like seasonality. macro factors, competitors spending, etc.\). It is a measure of the responsiveness of the number of prospects to changes in the marketing input or the baseline factor")
-        st.write("**• Media Half-Life:** This represents the time it takes for a media spend's impact to reduce to half of its initial impact. It is a key aspect of media decay rates, which represent how the effect of advertising diminishes over time \(in weeks\). This term refers to a curve that illustrates the relationship between media spend and the resulting number of prospects.")
-        st.write("**• Support:** Equivalent to Impression or Click depending on the media channel.")
-        st.write("**• Contribution Share:** Unit is %. It refers to the percentage contribution of a specific marketing channel to the number of prospects. It is calculated by dividing the contribution from a particular channel by the total number of prospects from all media channels \(not including base contributions\).")
-        st.write("**• Spend Share:** Unit is %. It refers to the percentage of the total marketing budget that is allocated to a specific marketing channel. It is calculated by dividing the amount spent on a particular channel by the total marketing spend")
-        st.write("**• Support Share:** Unit is %. It refers to the percentage of the total media impression that is allocated to a specific marketing channel. It is calculated by dividing support on a particular channel by the total marketing spend")
-        st.write("**• Efficiency Index:** it is a metric that measures the cost-effectiveness of a campaign. It is calculated by dividing Contribution Share by Spend Share. An efficiency index above 1 suggests that a channel is more cost-effective than the benchmark, while an efficiency index below 1 suggests it is less cost-effective. The higher the efficiency index, the more cost-effective its channel is")
-        st.write("**• Effectiveness Index:** It is a metric that measures how well a particular marketing channel is performing relative to its support/impression. It is calculated by dividing the Contribution Share by the Spend Share for each channel")
-        st.write("**• Estimated CPM \(Cost Per Thousand Impressions\):** This is an estimation of the cost for every thousand impressions \(or views\) of its advertisement via that media channel. The default values are generated from historical averages.")
-        st.write("**• Estimated CPC \(Cost Per Click\):** This is an estimation of the cost for each time someone clicks on its advertisement via that media channel. The default values are generated from historical averages.")
-    with st.expander("Deployment Plan"):
-        st.image(r"image (2).png")
 glossary_run()

+import streamlit as st
+# st.set_page_config(
+#     layout="wide"
+# )
+def glossary_run():
+    st.header("Glossary")
+    with st.expander("Model MMM Terminology"):
+        st.subheader("Glossary of MMM Terminology")
+        st.write("**• Model R-squared \(R\)\:** This is a statistical measure used to determine the percentage of variation in the dependent variable that the independent variables explain collectively. It ranges between 0 and 1, where 1 indicates a perfect fit and 0 indicates no linear relationship. An R2 greater than 0.8 usually indicates a great model fit.")
+        st.write("**• Mean Absolute Percentage Error \(MAPE\):** This is a measure used to determine the accuracy of a predictive model. It calculates the average absolute percentage difference between the actual and predicted values, expressing the result as a percentage to provide a sense of scale for the error.")
+        st.write("**• Media & Baseline Elasticity:** It refers to the percentage change in the number of prospects in response to a percentage change in a marketing input \(media channel spends\) or a baseline factor \(like seasonality. macro factors, competitors spending, etc.\). It is a measure of the responsiveness of the number of prospects to changes in the marketing input or the baseline factor")
+        st.write("**• Media Half-Life:** This represents the time it takes for a media spend's impact to reduce to half of its initial impact. It is a key aspect of media decay rates, which represent how the effect of advertising diminishes over time \(in weeks\). This term refers to a curve that illustrates the relationship between media spend and the resulting number of prospects.")
+        st.write("**• Support:** Equivalent to Impression or Click depending on the media channel.")
+        st.write("**• Contribution Share:** Unit is %. It refers to the percentage contribution of a specific marketing channel to the number of prospects. It is calculated by dividing the contribution from a particular channel by the total number of prospects from all media channels \(not including base contributions\).")
+        st.write("**• Spend Share:** Unit is %. It refers to the percentage of the total marketing budget that is allocated to a specific marketing channel. It is calculated by dividing the amount spent on a particular channel by the total marketing spend")
+        st.write("**• Support Share:** Unit is %. It refers to the percentage of the total media impression that is allocated to a specific marketing channel. It is calculated by dividing support on a particular channel by the total marketing spend")
+        st.write("**• Efficiency Index:** it is a metric that measures the cost-effectiveness of a campaign. It is calculated by dividing Contribution Share by Spend Share. An efficiency index above 1 suggests that a channel is more cost-effective than the benchmark, while an efficiency index below 1 suggests it is less cost-effective. The higher the efficiency index, the more cost-effective its channel is")
+        st.write("**• Effectiveness Index:** It is a metric that measures how well a particular marketing channel is performing relative to its support/impression. It is calculated by dividing the Contribution Share by the Spend Share for each channel")
+        st.write("**• Estimated CPM \(Cost Per Thousand Impressions\):** This is an estimation of the cost for every thousand impressions \(or views\) of its advertisement via that media channel. The default values are generated from historical averages.")
+        st.write("**• Estimated CPC \(Cost Per Click\):** This is an estimation of the cost for each time someone clicks on its advertisement via that media channel. The default values are generated from historical averages.")
+    with st.expander("Deployment Plan"):
+        st.image(r"image (2).png")
 glossary_run()

__pycache__/utilities.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/utilities.cpython-310.pyc and b/__pycache__/utilities.cpython-310.pyc differ

pages/2_Scenario_Planner.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

pages/3_Saved_Scenarios.py CHANGED Viewed

@@ -100,10 +100,11 @@ def comparison_scenarios_df():
     efficiency_df = pd.DataFrame(index = summary_df_prospect.index)
     for c in summary_df_spend.columns:
-        efficiency_df[c] = (summary_df_prospect[c]/summary_df_prospect[c].sum())/(summary_df_spend[c]/summary_df_spend[c].sum())
         efficiency_df[c] = efficiency_df[c].round(2)
-    return summary_df_spend,summary_df_prospect,efficiency_df
 import matplotlib.colors as mcolors
 import plotly.colors as pc
@@ -175,7 +176,7 @@ def plot_comparison_chart(df,metric,custom_colors):
     # Create the layout
     layout = go.Layout(
-        title='Comparing '+ metric,
         xaxis_title="Channels",
         yaxis_title=metric,
         barmode='group'
@@ -240,22 +241,23 @@ def create_comparison_plots():
     custom_colors = generate_color_gradient(blue, red, spends_df.shape[1])
     st.plotly_chart(plot_comparison_chart(spends_df,"Spends",custom_colors),use_container_width=True)
-    st.plotly_chart(plot_comparison_chart(prospects_df,"Contributions",custom_colors),use_container_width=True)
-    st.plotly_chart(plot_comparison_chart(efficiency_df,"Efficiency",custom_colors),use_container_width=True)
     fig1 = plot_comparison_chart(spends_df,"Spends",custom_colors)
-    fig2 = plot_comparison_chart(prospects_df,"Contributions",custom_colors)
-    fig3 = plot_comparison_chart(efficiency_df,"Efficiency",custom_colors)
-    ppt_file = save_ppt_file(fig1,fig2,fig3)
-    # Add a download button
-    st.download_button(
-        label="Download Comparision Analysis",
-        data=ppt_file,
-        file_name="MMM_Scenario_Comparision.pptx",
-        mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
-    )
 def create_scenario_summary(scenario_dict):
     summary_rows = []
@@ -268,8 +270,19 @@ def create_scenario_summary(scenario_dict):
     # st.write(modified_total_sales)
     # st.write(actual_total_sales[0])
     # st.write(modified_total_spends[0])
     for channel_dict in scenario_dict['channels']:
-        # st.write(channel_dict['name'])
         name_mod = channel_name_formating(channel_dict['name'])
         summary_rows.append([name_mod,
                              channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
@@ -304,14 +317,16 @@ def create_scenario_summary(scenario_dict):
                         ])
     adf = pd.DataFrame(summary_rows)
-    # st.write(adf.columns)
     adf.columns = ["1","2","3","4","5","6","7"]
     adf.index = adf["1"].to_list() #["1","2","3","4","5","6","7","8","9","10","11","12","13","14"]
     adf.drop(columns= ["1"],inplace= True)
     # columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["",""])
     # columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""]))
-    columns_index = pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""])
     adf.columns = columns_index
     return  adf # pd.DataFrame(summary_rows, columns=columns_index)
@@ -460,7 +475,7 @@ def download_scenarios():
                         # scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')
                         ])
         columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
-        columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','Prospects',
                                                                                         #   'ROI','MROI','Spends per NRPU'
                                                                                         ]], names=["first", "second"]))
         if summary_df is None:
@@ -470,6 +485,8 @@ def download_scenarios():
             _df = pd.DataFrame(summary_rows, columns = columns_index)
             _df = _df.set_index(('','Channel'))
             summary_df = summary_df.merge(_df, left_index=True, right_index=True)
     ws = wb.create_sheet('Summary',0)
     summary_df_to_worksheet(summary_df.reset_index(), ws)
     wb.save(st.session_state['xlsx_buffer'])
@@ -560,7 +577,7 @@ if auth_status == True:
         # with column_3:
             # st.button('Load Scenario', on_click=load_scenario)
-        selected_scenario_details = saved_scenarios[selected_scenario]
         pd.set_option('display.max_colwidth', 100)
         # st.table(create_scenario_summary(selected_scenario_details))

     efficiency_df = pd.DataFrame(index = summary_df_prospect.index)
     for c in summary_df_spend.columns:
+        efficiency_df[c] = (summary_df_prospect[c])/(summary_df_spend[c])
         efficiency_df[c] = efficiency_df[c].round(2)
+    return summary_df_spend, summary_df_prospect, efficiency_df
 import matplotlib.colors as mcolors
 import plotly.colors as pc
     # Create the layout
     layout = go.Layout(
+        title=metric,
         xaxis_title="Channels",
         yaxis_title=metric,
         barmode='group'
     custom_colors = generate_color_gradient(blue, red, spends_df.shape[1])
     st.plotly_chart(plot_comparison_chart(spends_df,"Spends",custom_colors),use_container_width=True)
+    st.plotly_chart(plot_comparison_chart(prospects_df,"Revenue",custom_colors),use_container_width=True)
+    st.plotly_chart(plot_comparison_chart(efficiency_df,"ROI",custom_colors),use_container_width=True)
     fig1 = plot_comparison_chart(spends_df,"Spends",custom_colors)
+    fig2 = plot_comparison_chart(prospects_df,"Revenue",custom_colors)
+    fig3 = plot_comparison_chart(efficiency_df,"ROI",custom_colors)
+    # ppt_file = save_ppt_file(fig1,fig2,fig3)
+    # # Add a download button
+    # st.download_button(
+    #     label="Download Comparision Analysis",
+    #     data=ppt_file,
+    #     file_name="MMM_Scenario_Comparision.pptx",
+    #     mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
+    # )
+import numpy as np
 def create_scenario_summary(scenario_dict):
     summary_rows = []
     # st.write(modified_total_sales)
     # st.write(actual_total_sales[0])
     # st.write(modified_total_spends[0])
+    # st.write(scenario_dict)
+    if scenario_dict == {} or 'channels' not in scenario_dict.keys():
+        st.warning("Save scenario properly again!")
+        st.stop()
+    # Ensure 'channels' exists and is a list
+    if "channels" in scenario_dict and isinstance(scenario_dict['channels'], list):
+        for channel in scenario_dict['channels']:
+            if channel.get("name") == "Connected & OTTTV":  # Check if the key "name" has the incorrect value
+                channel["name"] = "Connected & OTT TV"  # Replace it
     for channel_dict in scenario_dict['channels']:
         name_mod = channel_name_formating(channel_dict['name'])
         summary_rows.append([name_mod,
                              channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
                         ])
     adf = pd.DataFrame(summary_rows)
+    adf[5] = adf[3] / adf[1].replace(0, np.nan)
+    adf[6] = adf[4] / adf[2].replace(0, np.nan)
     adf.columns = ["1","2","3","4","5","6","7"]
     adf.index = adf["1"].to_list() #["1","2","3","4","5","6","7","8","9","10","11","12","13","14"]
     adf.drop(columns= ["1"],inplace= True)
     # columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["",""])
     # columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','Prospects',"Efficiency"],['Actual','Simulated']], names=["",""]))
+    columns_index = pd.MultiIndex.from_product([['Spends','Revenue',"ROI"],['Actual','Simulated']], names=["",""])
     adf.columns = columns_index
     return  adf # pd.DataFrame(summary_rows, columns=columns_index)
                         # scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')
                         ])
         columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
+        columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','Revenue',
                                                                                         #   'ROI','MROI','Spends per NRPU'
                                                                                         ]], names=["first", "second"]))
         if summary_df is None:
             _df = pd.DataFrame(summary_rows, columns = columns_index)
             _df = _df.set_index(('','Channel'))
             summary_df = summary_df.merge(_df, left_index=True, right_index=True)
+    st.write(summary_df)
     ws = wb.create_sheet('Summary',0)
     summary_df_to_worksheet(summary_df.reset_index(), ws)
     wb.save(st.session_state['xlsx_buffer'])
         # with column_3:
             # st.button('Load Scenario', on_click=load_scenario)
+        selected_scenario_details = saved_scenarios[selected_scenario].copy()
         pd.set_option('display.max_colwidth', 100)
         # st.table(create_scenario_summary(selected_scenario_details))

response_curves_input_file.xlsx CHANGED Viewed

Binary files a/response_curves_input_file.xlsx and b/response_curves_input_file.xlsx differ

summary_df.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3903995f3ab1eb9b34db90db9d8177955cff0a37af45969c97543cc82909a170
-size 1822

 version https://git-lfs.github.com/spec/v1
+oid sha256:4279102a6e3c64422e780dad800d1de417ff74112d57e124b790a1f6af5376ea
+size 1820

utilities.py CHANGED Viewed

@@ -216,9 +216,19 @@ def initialize_data(
     excel = pd.read_excel(target_file, sheet_name=None)
     # Extract dataframes for raw data, spend input, and contribution MMM
-    raw_df = excel["RAW DATA MMM"]
-    spend_df = excel["SPEND INPUT"]
-    contri_df = excel["CONTRIBUTION MMM"]
     # Check if the panel is not None
@@ -292,6 +302,7 @@ def initialize_data(
         params = pd.read_excel("response_curves_parameters.xlsx",index_col = "channel")
         param_dicts = {col: params[col].to_dict() for col in params.columns}
         response_curves[inp_col] = {
             "Kd": param_dicts["Kd"][inp_col],
             "n": param_dicts["n"][inp_col],
@@ -760,6 +771,7 @@ def decimal_formater(num_string, n_decimals=1):
 def channel_name_formating(channel_name):
     name_mod = channel_name.replace("_", " ")
     if name_mod.lower().endswith(" imp"):
         name_mod = name_mod.replace("Imp", "Spend")
@@ -779,9 +791,9 @@ def channel_name_formating(channel_name):
         "Email" :"Email" ,
         "SearchBrand": "Search Brand",
         "DisplayRetargeting" :  "Display Retargeting" ,
-        "\xa0Video":"Video"
     }
-    return key_dict[channel_name]
 def send_email(email, message):

     excel = pd.read_excel(target_file, sheet_name=None)
     # Extract dataframes for raw data, spend input, and contribution MMM
+    # raw_df = excel["RAW DATA MMM"]
+    # spend_df = excel["SPEND INPUT"]
+    # contri_df = excel["CONTRIBUTION MMM"]
+    # Function to strip spaces from column names and string values in a DataFrame
+    def clean_dataframe(df):
+        df.columns = df.columns.str.strip()  # Remove spaces from column names
+        return df.applymap(lambda x: x.strip() if isinstance(x, str) else x)  # Remove spaces from string values
+    # Apply the function to all DataFrames
+    raw_df = clean_dataframe(excel["RAW DATA MMM"])
+    spend_df = clean_dataframe(excel["SPEND INPUT"])
+    contri_df = clean_dataframe(excel["CONTRIBUTION MMM"])
     # Check if the panel is not None
         params = pd.read_excel("response_curves_parameters.xlsx",index_col = "channel")
         param_dicts = {col: params[col].to_dict() for col in params.columns}
         response_curves[inp_col] = {
             "Kd": param_dicts["Kd"][inp_col],
             "n": param_dicts["n"][inp_col],
 def channel_name_formating(channel_name):
+    channel_name = channel_name.strip()
     name_mod = channel_name.replace("_", " ")
     if name_mod.lower().endswith(" imp"):
         name_mod = name_mod.replace("Imp", "Spend")
         "Email" :"Email" ,
         "SearchBrand": "Search Brand",
         "DisplayRetargeting" :  "Display Retargeting" ,
+        "Video":"Video"
     }
+    return key_dict[channel_name].replace("Connected & OTTTV", "Connected & OTT TV")
 def send_email(email, message):