Spaces:

MVLLL
/

Multi-view-leaderboard

Running

File size: 17,755 Bytes

import gradio as gr
import pandas as pd
import pandas as pd
import json
import plotly.express as px

def on_confirm(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
    # 根据用户选择的参数构建文件路径
    num_parts = num_parts_dropdown
    
    if dataset_radio == "HumanEval":
        base_path = "./dividing_into_different_subsets"
    else:  # MBPP
        base_path = "./dividing_into_different_subsets_mbpp"
    
    method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"
    
    # 根据perspective选择读取对应的文件
    if "Tokens" in perspective_radio:
        df = pd.read_csv(f"{base_path}/{num_parts}/{method}/token_counts_{method}.csv")
    elif "Lines" in perspective_radio:
        df = pd.read_csv(f"{base_path}/{num_parts}/{method}/line_counts_{method}.csv")
    elif "Complexity" in perspective_radio:
        df = pd.read_csv(f"{base_path}/{num_parts}/{method}/CC_{method}.csv")
    elif "Problem Types" in perspective_radio:
        df = pd.read_csv(f"{base_path}/cata_result.csv")
    
    # 加载分析报告
    analysis_result,_ = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
    # AI分析列
    df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided."))
    return df

# 生成 CSS 样式
def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low):
    css = """
    #dataframe th {
        background-color: #f2f2f2
        
    }
    """
    colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"]
    categories = [line_counts, token_counts, cyclomatic_complexity]
    category_index = 0
    column_index = 1

    for category in categories:
        if category:
            if show_high:
                css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
                column_index += 1
            if show_medium:
                css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
                column_index += 1
            if show_low:
                css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n"
                column_index += 1
        category_index += 1

    # 为 Problem Type 相关的三个子列设置固定颜色
    if problem_type:
        problem_type_color = "#d4f0fc"  # 你可以选择任何你喜欢的颜色
        css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n"
        css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n"
        css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n"

    # 隐藏 "data" 标识
    css += """
    .gradio-container .dataframe-container::before {
        content: none !important;
    }
    """

    return css

# AI分析
def load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):

    num_parts = num_parts_dropdown
    method = "QS" if division_method_radio == "Equal Frequency Partitioning" else "EI"

    # 根据perspective确定文件路径
    if "Tokens" in perspective_radio:
        perspective = "token_counts"
    elif "Lines" in perspective_radio:
        perspective = "line_counts"
    elif "Complexity" in perspective_radio:
        perspective = "CC"
    else:
        perspective = "problem_type"

    base_path = "./llm_insight"
    if perspective == "problem_type":
        report_file = f"{base_path}/{dataset_radio}/{perspective}_report.json"
        recommendation_file = f"{base_path}/{dataset_radio}/{perspective}_recommendation.json"
    else:
        report_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_report.json"
        recommendation_file = f"{base_path}/{dataset_radio}/{num_parts}/{method}/{perspective}_recommendation.json"

    try:
        with open(report_file, 'r', encoding='utf-8') as f:
            analysis_result = json.load(f)
    except Exception as e:
        analysis_result = f"[Error] error load analysis report: {e}"

    try:
        with open(recommendation_file, 'r', encoding='utf-8') as f:
            recommendation_result = json.load(f)
    except Exception as e:
        recommendation_result = f"[Error] error load model recommendation: {e}"

    return (analysis_result,recommendation_result)

# 可视化
def plot_visualization(dataset_radio, perspective_radio, num_parts, plot_type):
    if dataset_radio == "HumanEval":
        base_path = "./dividing_into_different_subsets"
    else:  # MBPP
        base_path = "./dividing_into_different_subsets_mbpp"

    if "Tokens" in perspective_radio:
        file_path = f'{base_path}/{num_parts}/QS/token_counts_QS.csv'
    elif "Lines" in perspective_radio:
        file_path = f'{base_path}/{num_parts}/QS/line_counts_QS.csv'
    elif "Complexity" in perspective_radio:
        file_path = f'{base_path}/{num_parts}/QS/CC_QS.csv'
    else:  # Problem Types
        file_path = f'{base_path}/cata_result.csv'

    df = pd.read_csv(file_path)
    df.set_index('Model', inplace=True)
    df_transposed = df.T

    if plot_type == "Line Chart":
        fig = px.line(df_transposed, 
                     x=df_transposed.index, 
                     y=df_transposed.columns,
                     title='Model Performance Across Different Subsets',
                     labels={'value': 'Evaluation Score', 'index': 'Subsets'},
                     color_discrete_sequence=px.colors.qualitative.Plotly)
        fig.update_traces(hovertemplate='%{y}')
    elif plot_type == "Radar Chart":  # Radar Chart
        # 重新组织数据为雷达图所需格式
        radar_data = []
        for model in df.index:
            for subset, score in df.loc[model].items():
                radar_data.append({
                    'Model': model,
                    'Subset': subset,
                    'Score': score
                })
        
        radar_df = pd.DataFrame(radar_data)
        
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
        
        # 创建雷达图
        fig = px.line_polar(radar_df, 
                           r='Score',
                           theta='Subset',
                           color='Model',
                           line_close=True,
                           color_discrete_sequence=colors,
                           title='Model Performance Radar Chart')
        
        # 自定义每个模型的线条样式
        for i, trace in enumerate(fig.data):
            trace.update(
                fill=None,  # 移除填充
                line=dict(
                    width=2,
                    dash='solid' if i % 2 == 0 else 'dash',  # 交替使用实线和虚线
                )
            )
        
        # 优化雷达图的显示
        fig.update_layout(
            polar=dict(
                radialaxis=dict(
                    visible=True,
                    range=[0, 100],
                    showline=True,
                    linewidth=1,
                    gridcolor='lightgrey'
                ),
                angularaxis=dict(
                    showline=True,
                    linewidth=1,
                    gridcolor='lightgrey'
                )
            ),
            showlegend=True,
            legend=dict(
                yanchor="middle",  # 垂直居中
                y=0.5,
                xanchor="left",
                x=1.2,  # 将图例移到雷达图右侧
                bgcolor="rgba(255, 255, 255, 0.8)",  # 半透明白色背景
                bordercolor="lightgrey",  # 添加边框
                borderwidth=1
            ),
            margin=dict(r=150),  # 增加右侧边距，为图例留出空间
            paper_bgcolor='white'
        )
    else:  # Heatmap
        # 创建热力图
        fig = px.imshow(df_transposed,
                       labels=dict(x="Model", y="Subset", color="Score"),
                       color_continuous_scale="RdYlBu_r",  # 使用科研风格配色：红-黄-蓝
                       aspect="auto",  # 自动调整宽高比
                       title="Model Performance Heatmap")
        
        # 优化热力图显示
        fig.update_layout(
            title=dict(
                text='Model Performance Distribution Across Subsets',
                x=0.5,
                y=0.95,
                xanchor='center',
                yanchor='top',
                font=dict(size=14)
            ),
            xaxis=dict(
                title="Model",
                tickangle=45,  # 斜着显示模型名称
                tickfont=dict(size=10),
                side="bottom"
            ),
            yaxis=dict(
                title="Subset",
                tickfont=dict(size=10)
            ),
            coloraxis=dict(
                colorbar=dict(
                    title="Score",
                    titleside="right",
                    tickfont=dict(size=10),
                    titlefont=dict(size=12),
                    len=0.9,  # 色条长度
                )
            ),
            margin=dict(t=80, r=100, b=80, l=80),  # 调整边距
            paper_bgcolor='white',
            plot_bgcolor='white'
        )

        # 添加具体数值标注
        annotations = []
        for i in range(len(df_transposed.index)):
            for j in range(len(df_transposed.columns)):
                annotations.append(
                    dict(
                        x=j,
                        y=i,
                        text=f"{df_transposed.iloc[i, j]:.1f}",
                        showarrow=False,
                        font=dict(size=9, color='black')
                    )
                )
        fig.update_layout(annotations=annotations)

    return fig

# 旭日图
def plot_recommendation_sunburst(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
    import plotly.graph_objects as go
    _, recommendation_result = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
    labels = ['Model Recommendation']  # 根节点
    parents = ['']
    values = []
    customdata = ['Choose your preference model']

    # 统计每个场景下模型数量
    scenario_model_count = {}
    total_model_count = 0

    for scenario, model_list in recommendation_result.items():
        # 处理模型
        model_items = []
        if isinstance(model_list, dict):
            model_items = model_list.items()
        elif isinstance(model_list, list):
            for d in model_list:
                if isinstance(d, dict):
                    for k, v in d.items():
                        model_items.append((k, v))

        scenario_model_count[scenario] = len(model_items)
        total_model_count += len(model_items)

    # 根节点 value
    values.append(total_model_count)

    # 再次遍历，填充 labels/parents/values/customdata
    for scenario, model_list in recommendation_result.items():
        scenario_words = scenario.split()
        short_label = " ".join(scenario_words[:3]) + "..." if len(scenario_words) > 3 else scenario
        labels.append(short_label)
        parents.append('Model Recommendation')
        values.append(scenario_model_count[scenario])
        customdata.append(scenario)

        # 处理模型
        model_items = []
        if isinstance(model_list, dict):
            model_items = model_list.items()
        elif isinstance(model_list, list):
            for d in model_list:
                if isinstance(d, dict):
                    for k, v in d.items():
                        model_items.append((k, v))

        for model, reason in model_items:
            labels.append(model)
            parents.append(short_label)
            values.append(1)
            customdata.append(reason)

    fig = go.Figure(go.Sunburst(
        labels=labels,
        parents=parents,
        values=values,
        branchvalues="total",
        hovertemplate='%{customdata}<extra></extra>',
        customdata=customdata
    ))
    fig.update_layout(margin=dict(t=10, l=10, r=10, b=10), height=500)
    return fig

### Gradio代码部分 ### 

# 自定义 CSS 样式
custom_css = """
<style>
    body {
        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
        background-color: #f9f9f9;
    }
    .gr-label {
        font-size: 15px;
    }
    .gr-button-primary {
        background-color: #4CAF50;
        color: white;
        border-radius: 8px;
    }
    .gr-tabs > .tab-nav {
        background-color: #e0e0e0;
        border-bottom: 2px solid #ccc;
    }
    .gr-tabs > .tab-nav button.selected {
        background-color: #ffffff !important;
        border-bottom: 2px solid #4CAF50;
    }
    .gr-panel {
        padding: 20px;
        border-radius: 10px;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
        background-color: #fff;
    }
    .markdown-title {
        font-size: 1.5em;
        font-weight: bold;
        margin-bottom: 10px;
    }
    .analysis-box {
        background-color: #f1f8ff;
        padding: 20px;
        border-left: 5px solid #4CAF50;
        border-radius: 6px;
        margin-top: 10px;
    }
    .recommendation-box {
        background-color: #fff3cd;
        padding: 20px;
        border-left: 5px solid #ff9800;
        border-radius: 6px;
        margin-top: 10px;
    }
</style>
"""

# 构建界面
with gr.Blocks(css=custom_css) as iface:
    gr.HTML("""
    <div style='text-align:center; padding:15px;'>
        <h1>Multi-view Code LLM Leaderboard</h1>
        <p>Multi-view Leaderboard: Evaluating Large  Language Models From Multiple Views</p>
    </div>
    """)

    with gr.Row():
        # 选择配置
        with gr.Column(scale=1):
            dataset_radio = gr.Radio(
                ["HumanEval", "MBPP"],
                label="Select a dataset",
                value="HumanEval"
            )
            num_parts_slider = gr.Slider(
                minimum=3,
                maximum=8,
                step=1,
                label="Choose the Number of Subsets",
                value=3
            )
            
            # 将多个checkbox改为一个radio
            perspective_radio = gr.Radio(
                ["I - Num of Tokens in Problem Desc",
                 "II - Num of Lines in Problem Desc",
                 "III - Complexity of Reference Code",
                 "IV - Problem Types"],
                label="Choose Perspective",
                value="I - Num of Tokens in Problem Desc"
            )

            # 统一的division method radio
            division_method_radio = gr.Radio(
                ["Equal Frequency Partitioning", "Equal Interval Partitioning"],
                label="Choose the Division Method",
                visible=True
            )

            confirm_btn = gr.Button("Confirm", variant="primary")

        # 核心展示
        with gr.Column(scale=2):
            with gr.Tabs():
                # 表格
                with gr.TabItem("Ranking Table"):
                    data_table = gr.Dataframe(headers=["Model", "Score","Analysis"],interactive=True)
                # 可视化
                with gr.TabItem("Visualization"):
                    plot_type = gr.Radio(
                        choices=["Line Chart", "Radar Chart","Heatmap"],
                        label="Select Plot Type",
                        value="Line Chart"
                    )
                    chart = gr.Plot()
                # AI分析
                with gr.TabItem("Model selection suggestions"):
                    with gr.Column():
                        gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>")
                        recommendation_plot = gr.Plot()
                        scenario_legend = gr.Markdown(value="")  # 新增图例
    
    def update_perspective_options(dataset):
        if dataset == "MBPP":
            return gr.update(choices=[
                "I - Num of Tokens in Problem Desc",
                "III - Complexity of Reference Code",
                "IV - Problem Types"
            ])
        else:
            return gr.update(choices=[
                "I - Num of Tokens in Problem Desc",
                "II - Num of Lines in Problem Desc",
                "III - Complexity of Reference Code",
                "IV - Problem Types"
            ])

    dataset_radio.change(
        fn=update_perspective_options,
        inputs=dataset_radio,
        outputs=perspective_radio
    )


    # 绑定事件
    confirm_btn.click(
        fn=on_confirm,
        inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
        outputs=data_table
    ).then(
        fn=load_analysis_report,
        inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
        outputs=[gr.State()]
    ).then(
        fn=plot_visualization,
        inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type],
        outputs=chart
    ).then(
        fn=plot_recommendation_sunburst,
        inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
        outputs=[recommendation_plot]  # 注意这里是列表
    )

    plot_type.change(
        fn=plot_visualization,
        inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type],
        outputs=chart
    )
# 启动界面
iface.launch()