import gradio as gr import pandas as pd import pandas as pd import json import plotly.express as px def on_confirm(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): # 根据用户选择的参数构建文件路径 num_parts = num_parts_dropdown if dataset_radio == "HumanEval": base_path = "./dividing_into_different_subsets" else: # MBPP base_path = "./dividing_into_different_subsets_mbpp" method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" # 根据perspective选择读取对应的文件 if "Tokens" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv") elif "Lines" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv") elif "Complexity" in perspective_radio: df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv") elif "Problem Types" in perspective_radio: df = pd.read_csv(f"{base_path}/cata_result.csv") # 加载分析报告 analysis_result,_ = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) # AI分析列 df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided.")) return df # 生成 CSS 样式 def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): css = """ #dataframe th { background-color: #f2f2f2 } """ colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] categories = [line_counts, token_counts, cyclomatic_complexity] category_index = 0 column_index = 1 for category in categories: if category: if show_high: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_medium: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 if show_low: css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" column_index += 1 category_index += 1 # 为 Problem Type 相关的三个子列设置固定颜色 if problem_type: problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" # 隐藏 "data" 标识 css += """ .gradio-container .dataframe-container::before { content: none !important; } """ return css # AI分析 def load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): num_parts = num_parts_dropdown method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI" # 根据perspective确定文件路径 if "Tokens" in perspective_radio: perspective = "token_counts" elif "Lines" in perspective_radio: perspective = "line_counts" elif "Complexity" in perspective_radio: perspective = "CC" else: perspective = "problem_type" base_path = "./llm_insight" if perspective == "problem_type": report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json" recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json" else: report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json" recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json" try: with open(report_file, 'r', encoding='utf-8') as f: analysis_result = json.load(f) except Exception as e: analysis_result = f"[Error] error load analysis report: {e}" try: with open(recommendation_file, 'r', encoding='utf-8') as f: recommendation_result = json.load(f) except Exception as e: recommendation_result = f"[Error] error load model recommendation: {e}" return (analysis_result,recommendation_result) # 可视化 def plot_visualization(dataset_radio, perspective_radio, num_parts, plot_type): if dataset_radio == "HumanEval": base_path = "./dividing_into_different_subsets" else: # MBPP base_path = "./dividing_into_different_subsets_mbpp" if "Tokens" in perspective_radio: file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv' elif "Lines" in perspective_radio: file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv' elif "Complexity" in perspective_radio: file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv' else: # Problem Types file_path = f'{base_path}/cata_result.csv' df = pd.read_csv(file_path) df.set_index('Model', inplace=True) df_transposed = df.T if plot_type == "Line Chart": fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns, title='Model Performance Across Different Subsets', labels={'value': 'Evaluation Score', 'index': 'Subsets'}, color_discrete_sequence=px.colors.qualitative.Plotly) fig.update_traces(hovertemplate='%{y}') elif plot_type == "Radar Chart": # Radar Chart # 重新组织数据为雷达图所需格式 radar_data = [] for model in df.index: for subset, score in df.loc[model].items(): radar_data.append({ 'Model': model, 'Subset': subset, 'Score': score }) radar_df = pd.DataFrame(radar_data) colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] # 创建雷达图 fig = px.line_polar(radar_df, r='Score', theta='Subset', color='Model', line_close=True, color_discrete_sequence=colors, title='Model Performance Radar Chart') # 自定义每个模型的线条样式 for i, trace in enumerate(fig.data): trace.update( fill=None, # 移除填充 line=dict( width=2, dash='solid' if i % 2 == 0 else 'dash', # 交替使用实线和虚线 ) ) # 优化雷达图的显示 fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 100], showline=True, linewidth=1, gridcolor='lightgrey' ), angularaxis=dict( showline=True, linewidth=1, gridcolor='lightgrey' ) ), showlegend=True, legend=dict( yanchor="middle", # 垂直居中 y=0.5, xanchor="left", x=1.2, # 将图例移到雷达图右侧 bgcolor="rgba(255, 255, 255, 0.8)", # 半透明白色背景 bordercolor="lightgrey", # 添加边框 borderwidth=1 ), margin=dict(r=150), # 增加右侧边距,为图例留出空间 paper_bgcolor='white' ) else: # Heatmap # 创建热力图 fig = px.imshow(df_transposed, labels=dict(x="Model", y="Subset", color="Score"), color_continuous_scale="RdYlBu_r", # 使用科研风格配色:红-黄-蓝 aspect="auto", # 自动调整宽高比 title="Model Performance Heatmap") # 优化热力图显示 fig.update_layout( title=dict( text='Model Performance Distribution Across Subsets', x=0.5, y=0.95, xanchor='center', yanchor='top', font=dict(size=14) ), xaxis=dict( title="Model", tickangle=45, # 斜着显示模型名称 tickfont=dict(size=10), side="bottom" ), yaxis=dict( title="Subset", tickfont=dict(size=10) ), coloraxis=dict( colorbar=dict( title="Score", titleside="right", tickfont=dict(size=10), titlefont=dict(size=12), len=0.9, # 色条长度 ) ), margin=dict(t=80, r=100, b=80, l=80), # 调整边距 paper_bgcolor='white', plot_bgcolor='white' ) # 添加具体数值标注 annotations = [] for i in range(len(df_transposed.index)): for j in range(len(df_transposed.columns)): annotations.append( dict( x=j, y=i, text=f"{df_transposed.iloc[i, j]:.1f}", showarrow=False, font=dict(size=9, color='black') ) ) fig.update_layout(annotations=annotations) return fig # 旭日图 def plot_recommendation_sunburst(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio): import plotly.graph_objects as go _, recommendation_result = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio) labels = ['Model Recommendation'] # 根节点 parents = [''] values = [] customdata = ['Choose your preference model'] # 统计每个场景下模型数量 scenario_model_count = {} total_model_count = 0 for scenario, model_list in recommendation_result.items(): # 处理模型 model_items = [] if isinstance(model_list, dict): model_items = model_list.items() elif isinstance(model_list, list): for d in model_list: if isinstance(d, dict): for k, v in d.items(): model_items.append((k, v)) scenario_model_count[scenario] = len(model_items) total_model_count += len(model_items) # 根节点 value values.append(total_model_count) # 再次遍历,填充 labels/parents/values/customdata for scenario, model_list in recommendation_result.items(): scenario_words = scenario.split() short_label = " ".join(scenario_words[:3]) + "..." if len(scenario_words) > 3 else scenario labels.append(short_label) parents.append('Model Recommendation') values.append(scenario_model_count[scenario]) customdata.append(scenario) # 处理模型 model_items = [] if isinstance(model_list, dict): model_items = model_list.items() elif isinstance(model_list, list): for d in model_list: if isinstance(d, dict): for k, v in d.items(): model_items.append((k, v)) for model, reason in model_items: labels.append(model) parents.append(short_label) values.append(1) customdata.append(reason) fig = go.Figure(go.Sunburst( labels=labels, parents=parents, values=values, branchvalues="total", hovertemplate='%{customdata}', customdata=customdata )) fig.update_layout(margin=dict(t=10, l=10, r=10, b=10), height=500) return fig ### Gradio代码部分 ### # 自定义 CSS 样式 custom_css = """ """ # 构建界面 with gr.Blocks(css=custom_css) as iface: gr.HTML("""

Multi-view Code LLM Leaderboard

Multi-view Leaderboard: Evaluating Large Language Models From Multiple Views

""") with gr.Row(): # 选择配置 with gr.Column(scale=1): dataset_radio = gr.Radio( ["HumanEval", "MBPP"], label="Select a dataset", value="HumanEval" ) num_parts_slider = gr.Slider( minimum=3, maximum=8, step=1, label="Choose the Number of Subsets", value=3 ) # 将多个checkbox改为一个radio perspective_radio = gr.Radio( ["I - Num of Tokens in Problem Desc", "II - Num of Lines in Problem Desc", "III - Complexity of Reference Code", "IV - Problem Types"], label="Choose Perspective", value="I - Num of Tokens in Problem Desc" ) # 统一的division method radio division_method_radio = gr.Radio( ["Equal Frequency Partitioning", "Equal Interval Partitioning"], label="Choose the Division Method", visible=True ) confirm_btn = gr.Button("Confirm", variant="primary") # 核心展示 with gr.Column(scale=2): with gr.Tabs(): # 表格 with gr.TabItem("Ranking Table"): data_table = gr.Dataframe(headers=["Model", "Score","Analysis"],interactive=True) # 可视化 with gr.TabItem("Visualization"): plot_type = gr.Radio( choices=["Line Chart", "Radar Chart","Heatmap"], label="Select Plot Type", value="Line Chart" ) chart = gr.Plot() # AI分析 with gr.TabItem("Model selection suggestions"): with gr.Column(): gr.Markdown("

🎯 Model Recommendation

") recommendation_plot = gr.Plot() scenario_legend = gr.Markdown(value="") # 新增图例 def update_perspective_options(dataset): if dataset == "MBPP": return gr.update(choices=[ "I - Num of Tokens in Problem Desc", "III - Complexity of Reference Code", "IV - Problem Types" ]) else: return gr.update(choices=[ "I - Num of Tokens in Problem Desc", "II - Num of Lines in Problem Desc", "III - Complexity of Reference Code", "IV - Problem Types" ]) dataset_radio.change( fn=update_perspective_options, inputs=dataset_radio, outputs=perspective_radio ) # 绑定事件 confirm_btn.click( fn=on_confirm, inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio], outputs=data_table ).then( fn=load_analysis_report, inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio], outputs=[gr.State()] ).then( fn=plot_visualization, inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type], outputs=chart ).then( fn=plot_recommendation_sunburst, inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio], outputs=[recommendation_plot] # 注意这里是列表 ) plot_type.change( fn=plot_visualization, inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type], outputs=chart ) # 启动界面 iface.launch()