Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import pandas as pd | |
import json | |
import plotly.express as px | |
def on_confirm(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
# 根据用户选择的参数构建文件路径 | |
num_parts = num_parts_dropdown | |
if dataset_radio == "HumanEval": | |
base_path = "./dividing_into_different_subsets" | |
else: # MBPP | |
base_path = "./dividing_into_different_subsets_mbpp" | |
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
# 根据perspective选择读取对应的文件 | |
if "Tokens" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") | |
elif "Lines" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") | |
elif "Complexity" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") | |
elif "Problem Types" in perspective_radio: | |
df = pd.read_csv(f"{base_path}/cata_result.csv") | |
# 加载分析报告 | |
analysis_result,_ = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
# AI分析列 | |
df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided.")) | |
return df | |
# 生成 CSS 样式 | |
def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): | |
css = """ | |
#dataframe th { | |
background-color: #f2f2f2 | |
} | |
""" | |
colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] | |
categories = [line_counts, token_counts, cyclomatic_complexity] | |
category_index = 0 | |
column_index = 1 | |
for category in categories: | |
if category: | |
if show_high: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
if show_medium: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
if show_low: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
category_index += 1 | |
# 为 Problem Type 相关的三个子列设置固定颜色 | |
if problem_type: | |
problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" | |
css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" | |
css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" | |
# 隐藏 "data" 标识 | |
css += """ | |
.gradio-container .dataframe-container::before { | |
content: none !important; | |
} | |
""" | |
return css | |
# AI分析 | |
def load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
num_parts = num_parts_dropdown | |
method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" | |
# 根据perspective确定文件路径 | |
if "Tokens" in perspective_radio: | |
perspective = "token_counts" | |
elif "Lines" in perspective_radio: | |
perspective = "line_counts" | |
elif "Complexity" in perspective_radio: | |
perspective = "CC" | |
else: | |
perspective = "problem_type" | |
base_path = "./llm_insight" | |
if perspective == "problem_type": | |
report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" | |
recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" | |
else: | |
report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" | |
recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" | |
try: | |
with open(report_file, 'r', encoding='utf-8') as f: | |
analysis_result = json.load(f) | |
except Exception as e: | |
analysis_result = f"[Error] error load analysis report: {e}" | |
try: | |
with open(recommendation_file, 'r', encoding='utf-8') as f: | |
recommendation_result = json.load(f) | |
except Exception as e: | |
recommendation_result = f"[Error] error load model recommendation: {e}" | |
return (analysis_result,recommendation_result) | |
# 可视化 | |
def plot_visualization(dataset_radio, perspective_radio, num_parts, plot_type): | |
if dataset_radio == "HumanEval": | |
base_path = "./dividing_into_different_subsets" | |
else: # MBPP | |
base_path = "./dividing_into_different_subsets_mbpp" | |
if "Tokens" in perspective_radio: | |
file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' | |
elif "Lines" in perspective_radio: | |
file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' | |
elif "Complexity" in perspective_radio: | |
file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' | |
else: # Problem Types | |
file_path = f'{base_path}/cata_result.csv' | |
df = pd.read_csv(file_path) | |
df.set_index('Model', inplace=True) | |
df_transposed = df.T | |
if plot_type == "Line Chart": | |
fig = px.line(df_transposed, | |
x=df_transposed.index, | |
y=df_transposed.columns, | |
title='Model Performance Across Different Subsets', | |
labels={'value': 'Evaluation Score', 'index': 'Subsets'}, | |
color_discrete_sequence=px.colors.qualitative.Plotly) | |
fig.update_traces(hovertemplate='%{y}') | |
elif plot_type == "Radar Chart": # Radar Chart | |
# 重新组织数据为雷达图所需格式 | |
radar_data = [] | |
for model in df.index: | |
for subset, score in df.loc[model].items(): | |
radar_data.append({ | |
'Model': model, | |
'Subset': subset, | |
'Score': score | |
}) | |
radar_df = pd.DataFrame(radar_data) | |
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] | |
# 创建雷达图 | |
fig = px.line_polar(radar_df, | |
r='Score', | |
theta='Subset', | |
color='Model', | |
line_close=True, | |
color_discrete_sequence=colors, | |
title='Model Performance Radar Chart') | |
# 自定义每个模型的线条样式 | |
for i, trace in enumerate(fig.data): | |
trace.update( | |
fill=None, # 移除填充 | |
line=dict( | |
width=2, | |
dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线 | |
) | |
) | |
# 优化雷达图的显示 | |
fig.update_layout( | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 100], | |
showline=True, | |
linewidth=1, | |
gridcolor='lightgrey' | |
), | |
angularaxis=dict( | |
showline=True, | |
linewidth=1, | |
gridcolor='lightgrey' | |
) | |
), | |
showlegend=True, | |
legend=dict( | |
yanchor="middle", # 垂直居中 | |
y=0.5, | |
xanchor="left", | |
x=1.2, # 将图例移到雷达图右侧 | |
bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景 | |
bordercolor="lightgrey", # 添加边框 | |
borderwidth=1 | |
), | |
margin=dict(r=150), # 增加右侧边距,为图例留出空间 | |
paper_bgcolor='white' | |
) | |
else: # Heatmap | |
# 创建热力图 | |
fig = px.imshow(df_transposed, | |
labels=dict(x="Model", y="Subset", color="Score"), | |
color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝 | |
aspect="auto", # 自动调整宽高比 | |
title="Model Performance Heatmap") | |
# 优化热力图显示 | |
fig.update_layout( | |
title=dict( | |
text='Model Performance Distribution Across Subsets', | |
x=0.5, | |
y=0.95, | |
xanchor='center', | |
yanchor='top', | |
font=dict(size=14) | |
), | |
xaxis=dict( | |
title="Model", | |
tickangle=45, # 斜着显示模型名称 | |
tickfont=dict(size=10), | |
side="bottom" | |
), | |
yaxis=dict( | |
title="Subset", | |
tickfont=dict(size=10) | |
), | |
coloraxis=dict( | |
colorbar=dict( | |
title="Score", | |
titleside="right", | |
tickfont=dict(size=10), | |
titlefont=dict(size=12), | |
len=0.9, # 色条长度 | |
) | |
), | |
margin=dict(t=80, r=100, b=80, l=80), # 调整边距 | |
paper_bgcolor='white', | |
plot_bgcolor='white' | |
) | |
# 添加具体数值标注 | |
annotations = [] | |
for i in range(len(df_transposed.index)): | |
for j in range(len(df_transposed.columns)): | |
annotations.append( | |
dict( | |
x=j, | |
y=i, | |
text=f"{df_transposed.iloc[i, j]:.1f}", | |
showarrow=False, | |
font=dict(size=9, color='black') | |
) | |
) | |
fig.update_layout(annotations=annotations) | |
return fig | |
# 旭日图 | |
def plot_recommendation_sunburst(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): | |
import plotly.graph_objects as go | |
_, recommendation_result = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) | |
labels = ['Model Recommendation'] # 根节点 | |
parents = [''] | |
values = [] | |
customdata = ['Choose your preference model'] | |
# 统计每个场景下模型数量 | |
scenario_model_count = {} | |
total_model_count = 0 | |
for scenario, model_list in recommendation_result.items(): | |
# 处理模型 | |
model_items = [] | |
if isinstance(model_list, dict): | |
model_items = model_list.items() | |
elif isinstance(model_list, list): | |
for d in model_list: | |
if isinstance(d, dict): | |
for k, v in d.items(): | |
model_items.append((k, v)) | |
scenario_model_count[scenario] = len(model_items) | |
total_model_count += len(model_items) | |
# 根节点 value | |
values.append(total_model_count) | |
# 再次遍历,填充 labels/parents/values/customdata | |
for scenario, model_list in recommendation_result.items(): | |
scenario_words = scenario.split() | |
short_label = " ".join(scenario_words[:3]) + "..." if len(scenario_words) > 3 else scenario | |
labels.append(short_label) | |
parents.append('Model Recommendation') | |
values.append(scenario_model_count[scenario]) | |
customdata.append(scenario) | |
# 处理模型 | |
model_items = [] | |
if isinstance(model_list, dict): | |
model_items = model_list.items() | |
elif isinstance(model_list, list): | |
for d in model_list: | |
if isinstance(d, dict): | |
for k, v in d.items(): | |
model_items.append((k, v)) | |
for model, reason in model_items: | |
labels.append(model) | |
parents.append(short_label) | |
values.append(1) | |
customdata.append(reason) | |
fig = go.Figure(go.Sunburst( | |
labels=labels, | |
parents=parents, | |
values=values, | |
branchvalues="total", | |
hovertemplate='%{customdata}<extra></extra>', | |
customdata=customdata | |
)) | |
fig.update_layout(margin=dict(t=10, l=10, r=10, b=10), height=500) | |
return fig | |
### Gradio代码部分 ### | |
# 自定义 CSS 样式 | |
custom_css = """ | |
<style> | |
body { | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
background-color: #f9f9f9; | |
} | |
.gr-label { | |
font-size: 15px; | |
} | |
.gr-button-primary { | |
background-color: #4CAF50; | |
color: white; | |
border-radius: 8px; | |
} | |
.gr-tabs > .tab-nav { | |
background-color: #e0e0e0; | |
border-bottom: 2px solid #ccc; | |
} | |
.gr-tabs > .tab-nav button.selected { | |
background-color: #ffffff !important; | |
border-bottom: 2px solid #4CAF50; | |
} | |
.gr-panel { | |
padding: 20px; | |
border-radius: 10px; | |
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
background-color: #fff; | |
} | |
.markdown-title { | |
font-size: 1.5em; | |
font-weight: bold; | |
margin-bottom: 10px; | |
} | |
.analysis-box { | |
background-color: #f1f8ff; | |
padding: 20px; | |
border-left: 5px solid #4CAF50; | |
border-radius: 6px; | |
margin-top: 10px; | |
} | |
.recommendation-box { | |
background-color: #fff3cd; | |
padding: 20px; | |
border-left: 5px solid #ff9800; | |
border-radius: 6px; | |
margin-top: 10px; | |
} | |
</style> | |
""" | |
# 构建界面 | |
with gr.Blocks(css=custom_css) as iface: | |
gr.HTML(""" | |
<div style='text-align:center; padding:15px;'> | |
<h1>Multi-view Code LLM Leaderboard</h1> | |
<p>Multi-view Leaderboard: Evaluating Large Language Models From Multiple Views</p> | |
</div> | |
""") | |
with gr.Row(): | |
# 选择配置 | |
with gr.Column(scale=1): | |
dataset_radio = gr.Radio( | |
["HumanEval", "MBPP"], | |
label="Select a dataset", | |
value="HumanEval" | |
) | |
num_parts_slider = gr.Slider( | |
minimum=3, | |
maximum=8, | |
step=1, | |
label="Choose the Number of Subsets", | |
value=3 | |
) | |
# 将多个checkbox改为一个radio | |
perspective_radio = gr.Radio( | |
["I - Num of Tokens in Problem Desc", | |
"II - Num of Lines in Problem Desc", | |
"III - Complexity of Reference Code", | |
"IV - Problem Types"], | |
label="Choose Perspective", | |
value="I - Num of Tokens in Problem Desc" | |
) | |
# 统一的division method radio | |
division_method_radio = gr.Radio( | |
["Equal Frequency Partitioning", "Equal Interval Partitioning"], | |
label="Choose the Division Method", | |
visible=True | |
) | |
confirm_btn = gr.Button("Confirm", variant="primary") | |
# 核心展示 | |
with gr.Column(scale=2): | |
with gr.Tabs(): | |
# 表格 | |
with gr.TabItem("Ranking Table"): | |
data_table = gr.Dataframe(headers=["Model", "Score","Analysis"],interactive=True) | |
# 可视化 | |
with gr.TabItem("Visualization"): | |
plot_type = gr.Radio( | |
choices=["Line Chart", "Radar Chart","Heatmap"], | |
label="Select Plot Type", | |
value="Line Chart" | |
) | |
chart = gr.Plot() | |
# AI分析 | |
with gr.TabItem("Model selection suggestions"): | |
with gr.Column(): | |
gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>") | |
recommendation_plot = gr.Plot() | |
scenario_legend = gr.Markdown(value="") # 新增图例 | |
def update_perspective_options(dataset): | |
if dataset == "MBPP": | |
return gr.update(choices=[ | |
"I - Num of Tokens in Problem Desc", | |
"III - Complexity of Reference Code", | |
"IV - Problem Types" | |
]) | |
else: | |
return gr.update(choices=[ | |
"I - Num of Tokens in Problem Desc", | |
"II - Num of Lines in Problem Desc", | |
"III - Complexity of Reference Code", | |
"IV - Problem Types" | |
]) | |
dataset_radio.change( | |
fn=update_perspective_options, | |
inputs=dataset_radio, | |
outputs=perspective_radio | |
) | |
# 绑定事件 | |
confirm_btn.click( | |
fn=on_confirm, | |
inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
outputs=data_table | |
).then( | |
fn=load_analysis_report, | |
inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
outputs=[gr.State()] | |
).then( | |
fn=plot_visualization, | |
inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type], | |
outputs=chart | |
).then( | |
fn=plot_recommendation_sunburst, | |
inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio], | |
outputs=[recommendation_plot] # 注意这里是列表 | |
) | |
plot_type.change( | |
fn=plot_visualization, | |
inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type], | |
outputs=chart | |
) | |
# 启动界面 | |
iface.launch() |