Spaces:

ml6team
/

secret-agent-guardrail-challenge

Running

App Files Files Community

Miro Goettler commited on Jul 18, 2024

Commit

0ddc36e

1 Parent(s): 1455a96

Add info holder

Browse files

Files changed (1) hide show

app.py +22 -13

app.py CHANGED Viewed

@@ -13,8 +13,8 @@ import llm
 from card import card
-grey = "#f0f0f0"
 # init page
 st.set_page_config(
@@ -74,14 +74,14 @@ for idx, level in enumerate(config.LEVELS):
                     btn_submit_prompt = st.button(
                         "Send prompt", key=f"submit_prompt_{level}"
                     )
-                    output= None
                     # Show response
                     if len(txt) > 0 and btn_submit_prompt:
                         st.session_state[f"prompt_try_count_{level}"] += 1
                         with st.container(border=True):
                             st.write("Response:")
                             # special checks for certain levels
                             if level == "llm_judge_input":
                                 invalid, output_raw = llm.run_judge(
                                     level, {"user_input": txt}
@@ -206,7 +206,7 @@ for idx, level in enumerate(config.LEVELS):
                     icon="ℹ️",
                 )
-                hint_1_cont = card(color=grey)
                 hint1 = hint_1_cont.toggle(
                     "Show hint 1 - **Description of security strategy**",
                     key=f"hint1_checkbox_{level}",
@@ -221,7 +221,7 @@ for idx, level in enumerate(config.LEVELS):
                     hint_1_cont.write(config.LEVEL_DESCRIPTIONS[level]["info"])
-                hint_2_cont = card(color=grey)
                 hint2 = hint_2_cont.toggle(
                     "Show hint 2 - **Backend code execution**",
                     key=f"hint2_checkbox_{level}",
@@ -260,8 +260,8 @@ for idx, level in enumerate(config.LEVELS):
                             hint_2_cont.code(val, language=None)
                         hint_2_cont.write("The response of the LLM judge:")
                         intermediate_output = st.session_state[
-                                f"intermediate_output_holder_{level}"
-                            ]
                         if intermediate_output is None:
                             hint_2_cont.warning("Please submit a prompt first.")
@@ -271,7 +271,7 @@ for idx, level in enumerate(config.LEVELS):
                             hint_2_cont.write(
                                 f"The prompt was determined as **{'malicious' if invalid else 'not malicious'}** and therefor step 2 is executed."
                             )
                         hint_2_cont.write(
                             "*Step 2:* If the user input is not classified as malicious, the prompt containing the actual secret is executed and the response is shown."
                         )
@@ -331,8 +331,8 @@ for idx, level in enumerate(config.LEVELS):
                             hint_2_cont.code(val, language=None)
                         hint_2_cont.write("The response of the LLM judge:")
                         intermediate_output = st.session_state[
-                                f"intermediate_output_holder_{level}"
-                            ]
                         if intermediate_output is None:
                             hint_2_cont.warning("Please submit a prompt first.")
                         else:
@@ -417,7 +417,7 @@ for idx, level in enumerate(config.LEVELS):
                         )
                         show_base_prompt()
-                hint_3_cont = card(color=grey)
                 hint3 = hint_3_cont.toggle(
                     "Show hint 3 - **Prompt solution example**",
@@ -438,6 +438,15 @@ for idx, level in enumerate(config.LEVELS):
                         language=None,
                     )
                     hint_3_cont.info("*May not allways work")
 with st.expander("🏆 Record", expanded=True):
@@ -487,4 +496,4 @@ with st.expander("🏆 Record", expanded=True):
 # - story telling --> new field of study hard to be 100 percentage save
 # - switch to azure deployment --> currently not working under "GPT-4o"
 # - mark the user input with color in prompt
-# benefits and drawbacks, real world example

 from card import card
+hint_color = "#fce08b"
+info_color = "#bafc8b"
 # init page
 st.set_page_config(
                     btn_submit_prompt = st.button(
                         "Send prompt", key=f"submit_prompt_{level}"
                     )
+                    output = None
                     # Show response
                     if len(txt) > 0 and btn_submit_prompt:
                         st.session_state[f"prompt_try_count_{level}"] += 1
                         with st.container(border=True):
                             st.write("Response:")
                             # special checks for certain levels
                             if level == "llm_judge_input":
                                 invalid, output_raw = llm.run_judge(
                                     level, {"user_input": txt}
                     icon="ℹ️",
                 )
+                hint_1_cont = card(color=hint_color)
                 hint1 = hint_1_cont.toggle(
                     "Show hint 1 - **Description of security strategy**",
                     key=f"hint1_checkbox_{level}",
                     hint_1_cont.write(config.LEVEL_DESCRIPTIONS[level]["info"])
+                hint_2_cont = card(color=hint_color)
                 hint2 = hint_2_cont.toggle(
                     "Show hint 2 - **Backend code execution**",
                     key=f"hint2_checkbox_{level}",
                             hint_2_cont.code(val, language=None)
                         hint_2_cont.write("The response of the LLM judge:")
                         intermediate_output = st.session_state[
+                            f"intermediate_output_holder_{level}"
+                        ]
                         if intermediate_output is None:
                             hint_2_cont.warning("Please submit a prompt first.")
                             hint_2_cont.write(
                                 f"The prompt was determined as **{'malicious' if invalid else 'not malicious'}** and therefor step 2 is executed."
                             )
                         hint_2_cont.write(
                             "*Step 2:* If the user input is not classified as malicious, the prompt containing the actual secret is executed and the response is shown."
                         )
                             hint_2_cont.code(val, language=None)
                         hint_2_cont.write("The response of the LLM judge:")
                         intermediate_output = st.session_state[
+                            f"intermediate_output_holder_{level}"
+                        ]
                         if intermediate_output is None:
                             hint_2_cont.warning("Please submit a prompt first.")
                         else:
                         )
                         show_base_prompt()
+                hint_3_cont = card(color=hint_color)
                 hint3 = hint_3_cont.toggle(
                     "Show hint 3 - **Prompt solution example**",
                         language=None,
                     )
                     hint_3_cont.info("*May not allways work")
+                info_cont = card(color=info_color)
+                info_toogle = info_cont.toggle(
+                    "Show info",
+                    key=f"info_checkbox_{level}",
+                )
+                if info_toogle:
+                    info_cont.write("This is a demo to show the security levels of LLMs.")
 with st.expander("🏆 Record", expanded=True):
 # - story telling --> new field of study hard to be 100 percentage save
 # - switch to azure deployment --> currently not working under "GPT-4o"
 # - mark the user input with color in prompt
+# benefits and drawbacks, real world example