|
import matplotlib |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import plotly.graph_objects as go |
|
|
|
from utils import get_chart_colors |
|
|
|
|
|
def setup_matplotlib(): |
|
matplotlib.use("Agg") |
|
plt.close("all") |
|
|
|
|
|
def get_performance_chart(df, category_name="Overall"): |
|
plt.close("all") |
|
colors = get_chart_colors() |
|
score_column = "Category Score" |
|
df_sorted = df.sort_values(score_column, ascending=True) |
|
|
|
height = max(8, len(df_sorted) * 0.8) |
|
fig, ax = plt.subplots(figsize=(16, height)) |
|
plt.rcParams.update({"font.size": 12}) |
|
|
|
fig.patch.set_facecolor(colors["background"]) |
|
ax.set_facecolor(colors["background"]) |
|
|
|
try: |
|
bars = ax.barh( |
|
np.arange(len(df_sorted)), |
|
df_sorted[score_column], |
|
height=0.4, |
|
capstyle="round", |
|
color=[colors[t] for t in df_sorted["Model Type"]], |
|
) |
|
|
|
ax.set_title( |
|
f"Model Performance - {category_name}", |
|
pad=20, |
|
fontsize=20, |
|
fontweight="bold", |
|
color=colors["text"], |
|
) |
|
ax.set_xlabel( |
|
"Average Score (Tool Selection Quality)", |
|
fontsize=14, |
|
fontweight="bold", |
|
labelpad=10, |
|
color=colors["text"], |
|
) |
|
ax.set_xlim(0.0, 1.0) |
|
|
|
ax.set_yticks(np.arange(len(df_sorted))) |
|
ax.set_yticklabels(df_sorted["Model"], fontsize=12, fontweight="bold", color=colors["text"]) |
|
|
|
plt.subplots_adjust(left=0.35) |
|
|
|
for i, v in enumerate(df_sorted[score_column]): |
|
ax.text( |
|
v + 0.01, |
|
i, |
|
f"{v:.3f}", |
|
va="center", |
|
fontsize=12, |
|
fontweight="bold", |
|
color=colors["text"], |
|
) |
|
|
|
ax.grid(True, axis="x", linestyle="--", alpha=0.2, color=colors["grid"]) |
|
ax.spines[["top", "right"]].set_visible(False) |
|
ax.spines[["bottom", "left"]].set_color(colors["grid"]) |
|
ax.tick_params(colors=colors["text"]) |
|
|
|
legend_elements = [ |
|
plt.Rectangle((0, 0), 1, 1, facecolor=color, label=label) |
|
for label, color in {k: colors[k] for k in ["Private", "Open source"]}.items() |
|
] |
|
ax.legend( |
|
handles=legend_elements, |
|
title="Model Type", |
|
loc="lower right", |
|
fontsize=12, |
|
title_fontsize=14, |
|
facecolor=colors["background"], |
|
labelcolor=colors["text"], |
|
) |
|
|
|
plt.tight_layout() |
|
return fig |
|
finally: |
|
plt.close(fig) |
|
|
|
|
|
def create_radar_plot(df, model_names): |
|
datasets = [col for col in df.columns[7:] if col != "IO Cost"] |
|
fig = go.Figure() |
|
|
|
colors = ["rgba(99, 102, 241, 0.3)", "rgba(34, 197, 94, 0.3)"] |
|
line_colors = ["#4F46E5", "#16A34A"] |
|
|
|
for idx, model_name in enumerate(model_names): |
|
model_data = df[df["Model"] == model_name].iloc[0] |
|
values = [model_data[m] for m in datasets] |
|
values.append(values[0]) |
|
datasets_plot = datasets + [datasets[0]] |
|
|
|
fig.add_trace( |
|
go.Scatterpolar( |
|
r=values, |
|
theta=datasets_plot, |
|
fill="toself", |
|
fillcolor=colors[idx % len(colors)], |
|
line=dict(color=line_colors[idx % len(line_colors)], width=2), |
|
name=model_name, |
|
text=[f"{val:.3f}" for val in values], |
|
textposition="middle right", |
|
mode="lines+markers+text", |
|
) |
|
) |
|
|
|
fig.update_layout( |
|
polar=dict( |
|
radialaxis=dict(visible=True, range=[0, 1], showline=False, tickfont=dict(size=12)), |
|
angularaxis=dict( |
|
tickfont=dict(size=13, family="Arial"), |
|
rotation=90, |
|
direction="clockwise", |
|
), |
|
), |
|
showlegend=True, |
|
legend=dict( |
|
orientation="h", |
|
yanchor="bottom", |
|
y=-0.2, |
|
xanchor="center", |
|
x=0.5, |
|
font=dict(size=14), |
|
), |
|
title=dict( |
|
text="Model Comparison", |
|
x=0.5, |
|
y=0.95, |
|
font=dict(size=24, family="Arial", color="#1F2937"), |
|
), |
|
paper_bgcolor="white", |
|
plot_bgcolor="white", |
|
height=700, |
|
width=900, |
|
margin=dict(t=100, b=100, l=80, r=80), |
|
) |
|
|
|
return fig |
|
|
|
|
|
def get_performance_cost_chart(df, category_name="Overall"): |
|
colors = get_chart_colors() |
|
fig, ax = plt.subplots(figsize=(12, 8), dpi=300) |
|
|
|
fig.patch.set_facecolor(colors["background"]) |
|
ax.set_facecolor(colors["background"]) |
|
ax.grid(True, linestyle="--", alpha=0.15, which="both", color=colors["grid"]) |
|
|
|
score_column = "Category Score" |
|
|
|
for _, row in df.iterrows(): |
|
color = colors[row["Model Type"]] |
|
size = 100 if row[score_column] > 0.85 else 80 |
|
edge_color = colors["Private"] if row["Model Type"] == "Private" else colors["Open source"] |
|
|
|
ax.scatter( |
|
row["IO Cost"], |
|
row[score_column] * 100, |
|
c=color, |
|
s=size, |
|
alpha=0.9, |
|
edgecolor=edge_color, |
|
linewidth=1, |
|
zorder=5, |
|
) |
|
|
|
bbox_props = dict(boxstyle="round,pad=0.3", fc=colors["background"], ec="none", alpha=0.8) |
|
|
|
ax.annotate( |
|
f"{row['Model']}\n(${row['IO Cost']:.2f})", |
|
(row["IO Cost"], row[score_column] * 100), |
|
xytext=(5, 5), |
|
textcoords="offset points", |
|
fontsize=8, |
|
fontweight="bold", |
|
color=colors["text"], |
|
bbox=bbox_props, |
|
zorder=6, |
|
) |
|
|
|
ax.set_xscale("log") |
|
ax.set_xlim(0.08, 1000) |
|
ax.set_ylim(60, 100) |
|
|
|
ax.set_xlabel( |
|
"I/O Cost per Million Tokens ($)", |
|
fontsize=10, |
|
fontweight="bold", |
|
labelpad=10, |
|
color=colors["text"], |
|
) |
|
ax.set_ylabel( |
|
"Model Performance Score", |
|
fontsize=10, |
|
fontweight="bold", |
|
labelpad=10, |
|
color=colors["text"], |
|
) |
|
|
|
legend_elements = [plt.scatter([], [], c=colors[label], label=label, s=80) for label in ["Private", "Open source"]] |
|
ax.legend( |
|
handles=legend_elements, |
|
loc="upper right", |
|
frameon=True, |
|
facecolor=colors["background"], |
|
edgecolor="none", |
|
fontsize=9, |
|
labelcolor=colors["text"], |
|
) |
|
|
|
ax.set_title( |
|
f"Performance vs. Cost - {category_name}", |
|
fontsize=14, |
|
pad=15, |
|
fontweight="bold", |
|
color=colors["text"], |
|
) |
|
|
|
for y1, y2, color in zip([85, 75, 60], [100, 85, 75], colors["performance_bands"]): |
|
ax.axhspan(y1, y2, alpha=0.2, color=color, zorder=1) |
|
|
|
ax.tick_params(axis="both", which="major", labelsize=9, colors=colors["text"]) |
|
ax.tick_params(axis="both", which="minor", labelsize=8, colors=colors["text"]) |
|
ax.xaxis.set_minor_locator(plt.LogLocator(base=10.0, subs=np.arange(2, 10) * 0.1)) |
|
|
|
for spine in ax.spines.values(): |
|
spine.set_color(colors["grid"]) |
|
|
|
plt.tight_layout() |
|
return fig |
|
|