lowercase models
Browse files- app.py +24 -49
- src/display/utils.py +2 -2
app.py
CHANGED
@@ -225,6 +225,11 @@ def init_leaderboard(dataframe, visible_columns=None):
|
|
225 |
dataframe = pd.DataFrame(columns=columns)
|
226 |
logger.warning("Initializing empty leaderboard")
|
227 |
|
|
|
|
|
|
|
|
|
|
|
228 |
# print("\n\n", "dataframe", dataframe, "--------------------------------\n\n")
|
229 |
|
230 |
# Determine which columns to display
|
@@ -588,36 +593,21 @@ def create_performance_plot(
|
|
588 |
if df.empty:
|
589 |
return go.Figure()
|
590 |
|
591 |
-
#
|
|
|
|
|
|
|
592 |
df = df[df["model_name"].isin(selected_models)]
|
593 |
-
|
594 |
-
# Get the relevant metric columns
|
595 |
metric_cols = [col for col in df.columns if metric in col]
|
596 |
-
|
597 |
-
# Create figure
|
598 |
fig = go.Figure()
|
599 |
-
|
600 |
-
# Custom colors for different models
|
601 |
-
colors = [
|
602 |
-
"#8FCCCC",
|
603 |
-
"#C2A4B6",
|
604 |
-
"#98B4A6",
|
605 |
-
"#B68F7C",
|
606 |
-
] # Pale Cyan, Pale Pink, Pale Green, Pale Orange
|
607 |
-
|
608 |
-
# Add traces for each model
|
609 |
for idx, model in enumerate(selected_models):
|
610 |
model_data = df[df["model_name"] == model]
|
611 |
if not model_data.empty:
|
612 |
values = model_data[metric_cols].values[0].tolist()
|
613 |
-
# Add the first value again at the end to complete the polygon
|
614 |
values = values + [values[0]]
|
615 |
-
|
616 |
-
# Clean up test type names
|
617 |
categories = [col.replace(f"_{metric}", "") for col in metric_cols]
|
618 |
-
# Add the first category again at the end to complete the polygon
|
619 |
categories = categories + [categories[0]]
|
620 |
-
|
621 |
fig.add_trace(
|
622 |
go.Scatterpolar(
|
623 |
r=values,
|
@@ -627,8 +617,6 @@ def create_performance_plot(
|
|
627 |
fill="toself",
|
628 |
)
|
629 |
)
|
630 |
-
|
631 |
-
# Update layout with all settings at once
|
632 |
fig.update_layout(
|
633 |
paper_bgcolor="#000000",
|
634 |
plot_bgcolor="#000000",
|
@@ -663,7 +651,6 @@ def create_performance_plot(
|
|
663 |
font={"color": "#ffffff"},
|
664 |
),
|
665 |
)
|
666 |
-
|
667 |
return fig
|
668 |
|
669 |
|
@@ -674,7 +661,7 @@ def update_model_choices(version):
|
|
674 |
df = get_leaderboard_df(version=version)
|
675 |
if df.empty:
|
676 |
return []
|
677 |
-
return sorted(df["model_name"].unique().tolist())
|
678 |
|
679 |
|
680 |
def update_visualization(selected_models, selected_category, selected_metric, version):
|
@@ -744,7 +731,7 @@ with demo:
|
|
744 |
)
|
745 |
model_type_filter = gr.Dropdown(
|
746 |
choices=[
|
747 |
-
t.to_str("
|
748 |
],
|
749 |
label="Access Type",
|
750 |
multiselect=True,
|
@@ -981,15 +968,10 @@ with demo:
|
|
981 |
df = get_leaderboard_df(version=version)
|
982 |
if df.empty:
|
983 |
return []
|
984 |
-
|
985 |
-
|
986 |
-
[
|
987 |
-
|
988 |
-
for _, row in df.drop_duplicates(
|
989 |
-
subset=["model_name", "mode"]
|
990 |
-
).iterrows()
|
991 |
-
]
|
992 |
-
)
|
993 |
|
994 |
model_mode_selector = gr.Dropdown(
|
995 |
choices=get_model_mode_choices(CURRENT_VERSION),
|
@@ -1032,27 +1014,23 @@ with demo:
|
|
1032 |
df = (
|
1033 |
get_leaderboard_df(version=version)
|
1034 |
if selected_category == "All Results"
|
1035 |
-
else get_category_leaderboard_df(
|
1036 |
-
selected_category, version=version
|
1037 |
-
)
|
1038 |
)
|
1039 |
if df.empty:
|
1040 |
return go.Figure()
|
1041 |
-
|
|
|
1042 |
selected_pairs = [s.rsplit(" [", 1) for s in selected_model_modes]
|
1043 |
selected_pairs = [
|
1044 |
-
(name.strip(), mode.strip("] "))
|
1045 |
for name, mode in selected_pairs
|
1046 |
]
|
1047 |
mask = df.apply(
|
1048 |
-
lambda row: (row["model_name"], str(row["mode"]))
|
1049 |
-
in selected_pairs,
|
1050 |
axis=1,
|
1051 |
)
|
1052 |
filtered_df = df[mask]
|
1053 |
-
metric_cols = [
|
1054 |
-
col for col in filtered_df.columns if selected_metric in col
|
1055 |
-
]
|
1056 |
fig = go.Figure()
|
1057 |
colors = ["#8FCCCC", "#C2A4B6", "#98B4A6", "#B68F7C"]
|
1058 |
for idx, (model_name, mode) in enumerate(selected_pairs):
|
@@ -1063,10 +1041,7 @@ with demo:
|
|
1063 |
if not model_data.empty:
|
1064 |
values = model_data[metric_cols].values[0].tolist()
|
1065 |
values = values + [values[0]]
|
1066 |
-
categories = [
|
1067 |
-
col.replace(f"_{selected_metric}", "")
|
1068 |
-
for col in metric_cols
|
1069 |
-
]
|
1070 |
categories = categories + [categories[0]]
|
1071 |
fig.add_trace(
|
1072 |
go.Scatterpolar(
|
@@ -1175,7 +1150,7 @@ with demo:
|
|
1175 |
)
|
1176 |
model_type = gr.Dropdown(
|
1177 |
choices=[
|
1178 |
-
t.to_str("
|
1179 |
for t in ModelType
|
1180 |
if t != ModelType.Unknown
|
1181 |
],
|
|
|
225 |
dataframe = pd.DataFrame(columns=columns)
|
226 |
logger.warning("Initializing empty leaderboard")
|
227 |
|
228 |
+
# Lowercase model_name for display
|
229 |
+
if "model_name" in dataframe.columns:
|
230 |
+
dataframe = dataframe.copy()
|
231 |
+
dataframe["model_name"] = dataframe["model_name"].str.lower()
|
232 |
+
|
233 |
# print("\n\n", "dataframe", dataframe, "--------------------------------\n\n")
|
234 |
|
235 |
# Determine which columns to display
|
|
|
593 |
if df.empty:
|
594 |
return go.Figure()
|
595 |
|
596 |
+
# Lowercase model_name in df and selected_models
|
597 |
+
df = df.copy()
|
598 |
+
df["model_name"] = df["model_name"].str.lower()
|
599 |
+
selected_models = [m.lower() for m in selected_models]
|
600 |
df = df[df["model_name"].isin(selected_models)]
|
|
|
|
|
601 |
metric_cols = [col for col in df.columns if metric in col]
|
|
|
|
|
602 |
fig = go.Figure()
|
603 |
+
colors = ["#8FCCCC", "#C2A4B6", "#98B4A6", "#B68F7C"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
604 |
for idx, model in enumerate(selected_models):
|
605 |
model_data = df[df["model_name"] == model]
|
606 |
if not model_data.empty:
|
607 |
values = model_data[metric_cols].values[0].tolist()
|
|
|
608 |
values = values + [values[0]]
|
|
|
|
|
609 |
categories = [col.replace(f"_{metric}", "") for col in metric_cols]
|
|
|
610 |
categories = categories + [categories[0]]
|
|
|
611 |
fig.add_trace(
|
612 |
go.Scatterpolar(
|
613 |
r=values,
|
|
|
617 |
fill="toself",
|
618 |
)
|
619 |
)
|
|
|
|
|
620 |
fig.update_layout(
|
621 |
paper_bgcolor="#000000",
|
622 |
plot_bgcolor="#000000",
|
|
|
651 |
font={"color": "#ffffff"},
|
652 |
),
|
653 |
)
|
|
|
654 |
return fig
|
655 |
|
656 |
|
|
|
661 |
df = get_leaderboard_df(version=version)
|
662 |
if df.empty:
|
663 |
return []
|
664 |
+
return sorted(df["model_name"].str.lower().unique().tolist())
|
665 |
|
666 |
|
667 |
def update_visualization(selected_models, selected_category, selected_metric, version):
|
|
|
731 |
)
|
732 |
model_type_filter = gr.Dropdown(
|
733 |
choices=[
|
734 |
+
t.to_str("-") for t in ModelType if t != ModelType.Unknown
|
735 |
],
|
736 |
label="Access Type",
|
737 |
multiselect=True,
|
|
|
968 |
df = get_leaderboard_df(version=version)
|
969 |
if df.empty:
|
970 |
return []
|
971 |
+
return sorted([
|
972 |
+
f"{str(row['model_name']).lower()} [{row['mode']}]"
|
973 |
+
for _, row in df.drop_duplicates(subset=["model_name", "mode"]).iterrows()
|
974 |
+
])
|
|
|
|
|
|
|
|
|
|
|
975 |
|
976 |
model_mode_selector = gr.Dropdown(
|
977 |
choices=get_model_mode_choices(CURRENT_VERSION),
|
|
|
1014 |
df = (
|
1015 |
get_leaderboard_df(version=version)
|
1016 |
if selected_category == "All Results"
|
1017 |
+
else get_category_leaderboard_df(selected_category, version=version)
|
|
|
|
|
1018 |
)
|
1019 |
if df.empty:
|
1020 |
return go.Figure()
|
1021 |
+
df = df.copy()
|
1022 |
+
df["model_name"] = df["model_name"].str.lower()
|
1023 |
selected_pairs = [s.rsplit(" [", 1) for s in selected_model_modes]
|
1024 |
selected_pairs = [
|
1025 |
+
(name.strip().lower(), mode.strip("] "))
|
1026 |
for name, mode in selected_pairs
|
1027 |
]
|
1028 |
mask = df.apply(
|
1029 |
+
lambda row: (row["model_name"], str(row["mode"])) in selected_pairs,
|
|
|
1030 |
axis=1,
|
1031 |
)
|
1032 |
filtered_df = df[mask]
|
1033 |
+
metric_cols = [col for col in filtered_df.columns if selected_metric in col]
|
|
|
|
|
1034 |
fig = go.Figure()
|
1035 |
colors = ["#8FCCCC", "#C2A4B6", "#98B4A6", "#B68F7C"]
|
1036 |
for idx, (model_name, mode) in enumerate(selected_pairs):
|
|
|
1041 |
if not model_data.empty:
|
1042 |
values = model_data[metric_cols].values[0].tolist()
|
1043 |
values = values + [values[0]]
|
1044 |
+
categories = [col.replace(f"_{selected_metric}", "") for col in metric_cols]
|
|
|
|
|
|
|
1045 |
categories = categories + [categories[0]]
|
1046 |
fig.add_trace(
|
1047 |
go.Scatterpolar(
|
|
|
1150 |
)
|
1151 |
model_type = gr.Dropdown(
|
1152 |
choices=[
|
1153 |
+
t.to_str("-")
|
1154 |
for t in ModelType
|
1155 |
if t != ModelType.Unknown
|
1156 |
],
|
src/display/utils.py
CHANGED
@@ -24,7 +24,7 @@ class ModelType(Enum):
|
|
24 |
ClosedSource = auto()
|
25 |
API = auto()
|
26 |
|
27 |
-
def to_str(self, separator: str = "
|
28 |
"""Convert enum to string with separator."""
|
29 |
if self == ModelType.Unknown:
|
30 |
return "Unknown"
|
@@ -44,7 +44,7 @@ class GuardModelType(str, Enum):
|
|
44 |
OPENAI_MODERATION = "openai_moderation"
|
45 |
LLM_REGEXP = "llm_regexp"
|
46 |
LLM_SO = "llm_so"
|
47 |
-
|
48 |
|
49 |
def __str__(self):
|
50 |
"""String representation of the guard model type."""
|
|
|
24 |
ClosedSource = auto()
|
25 |
API = auto()
|
26 |
|
27 |
+
def to_str(self, separator: str = "-") -> str:
|
28 |
"""Convert enum to string with separator."""
|
29 |
if self == ModelType.Unknown:
|
30 |
return "Unknown"
|
|
|
44 |
OPENAI_MODERATION = "openai_moderation"
|
45 |
LLM_REGEXP = "llm_regexp"
|
46 |
LLM_SO = "llm_so"
|
47 |
+
WHITECIRCLE_GUARD = "whitecircle_guard"
|
48 |
|
49 |
def __str__(self):
|
50 |
"""String representation of the guard model type."""
|