ajaxzhan's picture
add_support (#1)
b8a0ce6 verified
import gradio as gr
import pandas as pd
import pandas as pd
import json
import plotly.express as px
def on_confirm(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
# 根据用户选择的参数构建文件路径
num_parts = num_parts_dropdown
if dataset_radio == "HumanEval":
base_path = "./dividing_into_different_subsets"
else: # MBPP
base_path = "./dividing_into_different_subsets_mbpp"
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
# 根据perspective选择读取对应的文件
if "Tokens" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
elif "Lines" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
elif "Complexity" in perspective_radio:
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv")
elif "Problem Types" in perspective_radio:
df = pd.read_csv(f"{base_path}/cata_result.csv")
# 加载分析报告
analysis_result,_ = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
# AI分析列
df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided."))
return df
# 生成 CSS 样式
def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low):
css = """
#dataframe th {
background-color: #f2f2f2
}
"""
colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"]
categories = [line_counts, token_counts, cyclomatic_complexity]
category_index = 0
column_index = 1
for category in categories:
if category:
if show_high:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
if show_medium:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
if show_low:
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
column_index += 1
category_index += 1
# 为 Problem Type 相关的三个子列设置固定颜色
if problem_type:
problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n"
css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n"
css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n"
# 隐藏 "data" 标识
css += """
.gradio-container .dataframe-container::before {
content: none !important;
}
"""
return css
# AI分析
def load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
num_parts = num_parts_dropdown
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
# 根据perspective确定文件路径
if "Tokens" in perspective_radio:
perspective = "token_counts"
elif "Lines" in perspective_radio:
perspective = "line_counts"
elif "Complexity" in perspective_radio:
perspective = "CC"
else:
perspective = "problem_type"
base_path = "./llm_insight"
if perspective == "problem_type":
report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json"
recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json"
else:
report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json"
recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json"
try:
with open(report_file, 'r', encoding='utf-8') as f:
analysis_result = json.load(f)
except Exception as e:
analysis_result = f"[Error] error load analysis report: {e}"
try:
with open(recommendation_file, 'r', encoding='utf-8') as f:
recommendation_result = json.load(f)
except Exception as e:
recommendation_result = f"[Error] error load model recommendation: {e}"
return (analysis_result,recommendation_result)
# 可视化
def plot_visualization(dataset_radio, perspective_radio, num_parts, plot_type):
if dataset_radio == "HumanEval":
base_path = "./dividing_into_different_subsets"
else: # MBPP
base_path = "./dividing_into_different_subsets_mbpp"
if "Tokens" in perspective_radio:
file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv'
elif "Lines" in perspective_radio:
file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv'
elif "Complexity" in perspective_radio:
file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv'
else: # Problem Types
file_path = f'{base_path}/cata_result.csv'
df = pd.read_csv(file_path)
df.set_index('Model', inplace=True)
df_transposed = df.T
if plot_type == "Line Chart":
fig = px.line(df_transposed,
x=df_transposed.index,
y=df_transposed.columns,
title='Model Performance Across Different Subsets',
labels={'value': 'Evaluation Score', 'index': 'Subsets'},
color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_traces(hovertemplate='%{y}')
elif plot_type == "Radar Chart": # Radar Chart
# 重新组织数据为雷达图所需格式
radar_data = []
for model in df.index:
for subset, score in df.loc[model].items():
radar_data.append({
'Model': model,
'Subset': subset,
'Score': score
})
radar_df = pd.DataFrame(radar_data)
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# 创建雷达图
fig = px.line_polar(radar_df,
r='Score',
theta='Subset',
color='Model',
line_close=True,
color_discrete_sequence=colors,
title='Model Performance Radar Chart')
# 自定义每个模型的线条样式
for i, trace in enumerate(fig.data):
trace.update(
fill=None, # 移除填充
line=dict(
width=2,
dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线
)
)
# 优化雷达图的显示
fig.update_layout(
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 100],
showline=True,
linewidth=1,
gridcolor='lightgrey'
),
angularaxis=dict(
showline=True,
linewidth=1,
gridcolor='lightgrey'
)
),
showlegend=True,
legend=dict(
yanchor="middle", # 垂直居中
y=0.5,
xanchor="left",
x=1.2, # 将图例移到雷达图右侧
bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景
bordercolor="lightgrey", # 添加边框
borderwidth=1
),
margin=dict(r=150), # 增加右侧边距,为图例留出空间
paper_bgcolor='white'
)
else: # Heatmap
# 创建热力图
fig = px.imshow(df_transposed,
labels=dict(x="Model", y="Subset", color="Score"),
color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝
aspect="auto", # 自动调整宽高比
title="Model Performance Heatmap")
# 优化热力图显示
fig.update_layout(
title=dict(
text='Model Performance Distribution Across Subsets',
x=0.5,
y=0.95,
xanchor='center',
yanchor='top',
font=dict(size=14)
),
xaxis=dict(
title="Model",
tickangle=45, # 斜着显示模型名称
tickfont=dict(size=10),
side="bottom"
),
yaxis=dict(
title="Subset",
tickfont=dict(size=10)
),
coloraxis=dict(
colorbar=dict(
title="Score",
titleside="right",
tickfont=dict(size=10),
titlefont=dict(size=12),
len=0.9, # 色条长度
)
),
margin=dict(t=80, r=100, b=80, l=80), # 调整边距
paper_bgcolor='white',
plot_bgcolor='white'
)
# 添加具体数值标注
annotations = []
for i in range(len(df_transposed.index)):
for j in range(len(df_transposed.columns)):
annotations.append(
dict(
x=j,
y=i,
text=f"{df_transposed.iloc[i, j]:.1f}",
showarrow=False,
font=dict(size=9, color='black')
)
)
fig.update_layout(annotations=annotations)
return fig
# 旭日图
def plot_recommendation_sunburst(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
import plotly.graph_objects as go
_, recommendation_result = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
labels = ['Model Recommendation'] # 根节点
parents = ['']
values = []
customdata = ['Choose your preference model']
# 统计每个场景下模型数量
scenario_model_count = {}
total_model_count = 0
for scenario, model_list in recommendation_result.items():
# 处理模型
model_items = []
if isinstance(model_list, dict):
model_items = model_list.items()
elif isinstance(model_list, list):
for d in model_list:
if isinstance(d, dict):
for k, v in d.items():
model_items.append((k, v))
scenario_model_count[scenario] = len(model_items)
total_model_count += len(model_items)
# 根节点 value
values.append(total_model_count)
# 再次遍历,填充 labels/parents/values/customdata
for scenario, model_list in recommendation_result.items():
scenario_words = scenario.split()
short_label = " ".join(scenario_words[:3]) + "..." if len(scenario_words) > 3 else scenario
labels.append(short_label)
parents.append('Model Recommendation')
values.append(scenario_model_count[scenario])
customdata.append(scenario)
# 处理模型
model_items = []
if isinstance(model_list, dict):
model_items = model_list.items()
elif isinstance(model_list, list):
for d in model_list:
if isinstance(d, dict):
for k, v in d.items():
model_items.append((k, v))
for model, reason in model_items:
labels.append(model)
parents.append(short_label)
values.append(1)
customdata.append(reason)
fig = go.Figure(go.Sunburst(
labels=labels,
parents=parents,
values=values,
branchvalues="total",
hovertemplate='%{customdata}<extra></extra>',
customdata=customdata
))
fig.update_layout(margin=dict(t=10, l=10, r=10, b=10), height=500)
return fig
### Gradio代码部分 ###
# 自定义 CSS 样式
custom_css = """
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background-color: #f9f9f9;
}
.gr-label {
font-size: 15px;
}
.gr-button-primary {
background-color: #4CAF50;
color: white;
border-radius: 8px;
}
.gr-tabs > .tab-nav {
background-color: #e0e0e0;
border-bottom: 2px solid #ccc;
}
.gr-tabs > .tab-nav button.selected {
background-color: #ffffff !important;
border-bottom: 2px solid #4CAF50;
}
.gr-panel {
padding: 20px;
border-radius: 10px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
background-color: #fff;
}
.markdown-title {
font-size: 1.5em;
font-weight: bold;
margin-bottom: 10px;
}
.analysis-box {
background-color: #f1f8ff;
padding: 20px;
border-left: 5px solid #4CAF50;
border-radius: 6px;
margin-top: 10px;
}
.recommendation-box {
background-color: #fff3cd;
padding: 20px;
border-left: 5px solid #ff9800;
border-radius: 6px;
margin-top: 10px;
}
</style>
"""
# 构建界面
with gr.Blocks(css=custom_css) as iface:
gr.HTML("""
<div style='text-align:center; padding:15px;'>
<h1>Multi-view Code LLM Leaderboard</h1>
<p>Multi-view Leaderboard: Evaluating Large Language Models From Multiple Views</p>
</div>
""")
with gr.Row():
# 选择配置
with gr.Column(scale=1):
dataset_radio = gr.Radio(
["HumanEval", "MBPP"],
label="Select a dataset",
value="HumanEval"
)
num_parts_slider = gr.Slider(
minimum=3,
maximum=8,
step=1,
label="Choose the Number of Subsets",
value=3
)
# 将多个checkbox改为一个radio
perspective_radio = gr.Radio(
["I - Num of Tokens in Problem Desc",
"II - Num of Lines in Problem Desc",
"III - Complexity of Reference Code",
"IV - Problem Types"],
label="Choose Perspective",
value="I - Num of Tokens in Problem Desc"
)
# 统一的division method radio
division_method_radio = gr.Radio(
["Equal Frequency Partitioning", "Equal Interval Partitioning"],
label="Choose the Division Method",
visible=True
)
confirm_btn = gr.Button("Confirm", variant="primary")
# 核心展示
with gr.Column(scale=2):
with gr.Tabs():
# 表格
with gr.TabItem("Ranking Table"):
data_table = gr.Dataframe(headers=["Model", "Score","Analysis"],interactive=True)
# 可视化
with gr.TabItem("Visualization"):
plot_type = gr.Radio(
choices=["Line Chart", "Radar Chart","Heatmap"],
label="Select Plot Type",
value="Line Chart"
)
chart = gr.Plot()
# AI分析
with gr.TabItem("Model selection suggestions"):
with gr.Column():
gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>")
recommendation_plot = gr.Plot()
scenario_legend = gr.Markdown(value="") # 新增图例
def update_perspective_options(dataset):
if dataset == "MBPP":
return gr.update(choices=[
"I - Num of Tokens in Problem Desc",
"III - Complexity of Reference Code",
"IV - Problem Types"
])
else:
return gr.update(choices=[
"I - Num of Tokens in Problem Desc",
"II - Num of Lines in Problem Desc",
"III - Complexity of Reference Code",
"IV - Problem Types"
])
dataset_radio.change(
fn=update_perspective_options,
inputs=dataset_radio,
outputs=perspective_radio
)
# 绑定事件
confirm_btn.click(
fn=on_confirm,
inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
outputs=data_table
).then(
fn=load_analysis_report,
inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
outputs=[gr.State()]
).then(
fn=plot_visualization,
inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type],
outputs=chart
).then(
fn=plot_recommendation_sunburst,
inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
outputs=[recommendation_plot] # 注意这里是列表
)
plot_type.change(
fn=plot_visualization,
inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type],
outputs=chart
)
# 启动界面
iface.launch()