Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import requests | |
import os | |
import shutil | |
import json | |
import pandas as pd | |
import subprocess | |
import plotly.express as px | |
def on_confirm(dataset_radio, num_parts_dropdown, token_counts_radio, line_counts_radio, cyclomatic_complexity_radio, problem_type_checkbox): | |
# 根据用户选择的参数构建文件路径 | |
num_parts = num_parts_dropdown | |
# token_counts_split = token_counts_radio | |
# line_counts_split = line_counts_radio | |
# cyclomatic_complexity_split = cyclomatic_complexity_radio | |
# 读取数据 | |
dataframes = [] | |
if dataset_radio == "HumanEval": | |
if token_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 | |
token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/token_counts_QS.csv") | |
dataframes.append(token_counts_df) | |
if token_counts_radio=="Equal Interval Partitioning": | |
token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/token_counts_EI.csv") | |
dataframes.append(token_counts_df) | |
if line_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 | |
line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/line_counts_QS.csv") | |
dataframes.append(line_counts_df) | |
if token_counts_radio=="Equal Interval Partitioning": | |
line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/line_counts_EI.csv") | |
dataframes.append(line_counts_df) | |
if cyclomatic_complexity_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 | |
CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/QS/CC_QS.csv") | |
dataframes.append(CC_df) | |
if token_counts_radio=="Equal Interval Partitioning": | |
CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets/{num_parts}/EI/CC_EI.csv") | |
dataframes.append(CC_df) | |
#以下改为直接从一个划分文件中读取即可 | |
if problem_type_checkbox: | |
problem_type_df = pd.read_csv("/home/user/app/dividing_into_different_subsets/cata_result.csv") | |
dataframes.append(problem_type_df) | |
if dataset_radio == "MBPP": | |
if token_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 | |
token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/token_counts_QS.csv") | |
dataframes.append(token_counts_df) | |
if token_counts_radio=="Equal Interval Partitioning": | |
token_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/token_counts_EI.csv") | |
dataframes.append(token_counts_df) | |
if line_counts_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 | |
line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/line_counts_QS.csv") | |
dataframes.append(line_counts_df) | |
if token_counts_radio=="Equal Interval Partitioning": | |
line_counts_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/line_counts_EI.csv") | |
dataframes.append(line_counts_df) | |
if cyclomatic_complexity_radio=="Equal Frequency Partitioning":#等频划分,每个子集数据点的数量基本一致 | |
CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/QS/CC_QS.csv") | |
dataframes.append(CC_df) | |
if token_counts_radio=="Equal Interval Partitioning": | |
CC_df = pd.read_csv(f"/home/user/app/dividing_into_different_subsets_mbpp/{num_parts}/EI/CC_EI.csv") | |
dataframes.append(CC_df) | |
#以下改为直接从一个划分文件中读取即可 | |
if problem_type_checkbox: | |
problem_type_df = pd.read_csv("/home/user/app/dividing_into_different_subsets_mbpp/cata_result.csv") | |
dataframes.append(problem_type_df) | |
# 如果所有三个radio都有value,将三个文件中的所有行拼接 | |
if len(dataframes) > 0: | |
combined_df = dataframes[0] | |
for df in dataframes[1:]: | |
combined_df = pd.merge(combined_df, df, left_index=True, right_index=True, suffixes=('', '_y')) | |
combined_df = combined_df.loc[:, ~combined_df.columns.str.endswith('_y')] # 去除重复的列 | |
return combined_df | |
else: | |
return pd.DataFrame() | |
def execute_specified_python_files(directory_list, file_list): | |
for directory in directory_list: | |
for py_file in file_list: | |
file_path = os.path.join(directory, py_file) | |
if os.path.isfile(file_path) and py_file.endswith('.py'): | |
print(f"Executing {file_path}...") | |
try: | |
# 使用subprocess执行Python文件 | |
subprocess.run(['python', file_path], check=True) | |
print(f"{file_path} executed successfully.") | |
except subprocess.CalledProcessError as e: | |
print(f"Error executing {file_path}: {e}") | |
else: | |
print(f"File {file_path} does not exist or is not a Python file.") | |
# 定义一个函数来生成 CSS 样式 | |
def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type, show_high, show_medium, show_low): | |
css = """ | |
#dataframe th { | |
background-color: #f2f2f2 | |
} | |
""" | |
colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"] | |
categories = [line_counts, token_counts, cyclomatic_complexity] | |
category_index = 0 | |
column_index = 1 | |
for category in categories: | |
if category: | |
if show_high: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
if show_medium: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
if show_low: | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {colors[category_index]}; }}\n" | |
column_index += 1 | |
category_index += 1 | |
# 为 Problem Type 相关的三个子列设置固定颜色 | |
if problem_type: | |
problem_type_color = "#d4f0fc" # 你可以选择任何你喜欢的颜色 | |
css += f"#dataframe td:nth-child({column_index + 1}) {{ background-color: {problem_type_color}; }}\n" | |
css += f"#dataframe td:nth-child({column_index + 2}) {{ background-color: {problem_type_color}; }}\n" | |
css += f"#dataframe td:nth-child({column_index + 3}) {{ background-color: {problem_type_color}; }}\n" | |
# 隐藏 "data" 标识 | |
css += """ | |
.gradio-container .dataframe-container::before { | |
content: none !important; | |
} | |
""" | |
return css | |
def update_radio_options(token_counts, line_counts, cyclomatic_complexity, problem_type): | |
options = [] | |
if token_counts: | |
options.append("The Number of Tokens in Problem Descriptions") | |
if line_counts: | |
options.append("The Number of Lines in Problem Descriptions") | |
if cyclomatic_complexity: | |
options.append("The Complexity of Reference Code") | |
if problem_type: | |
options.append("Problem Type") | |
return gr.update(choices=options) | |
def plot_csv(dataset_radio,radio,num): | |
print(dataset_radio,radio) | |
if dataset_radio=="HumanEval": | |
if radio=="The Number of Tokens in Problem Descriptions": | |
radio_choice="token_counts" | |
file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' | |
elif radio=="The Number of Lines in Problem Descriptions": | |
radio_choice="line_counts" | |
file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' | |
elif radio=="The Complexity of Reference Code": | |
radio_choice="CC" | |
file_path = f'/home/user/app/dividing_into_different_subsets/{num}/QS/{radio_choice}_QS.csv' | |
elif radio=="Problem Type": | |
radio_choice="problem_type" | |
file_path = f'/home/user/app/dividing_into_different_subsets/cata_result.csv' | |
print("test!") | |
elif dataset_radio=="MBPP": | |
if radio=="The Number of Tokens in Problem Descriptions": | |
radio_choice="token_counts" | |
file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' | |
elif radio=="The Number of Lines in Problem Descriptions": | |
radio_choice="line_counts" | |
file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' | |
elif radio=="The Complexity of Reference Code": | |
radio_choice="CC" | |
file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/{num}/QS/{radio_choice}_QS.csv' | |
elif radio=="Problem Type": | |
radio_choice="problem_type" | |
file_path = f'/home/user/app/dividing_into_different_subsets_mbpp/cata_result.csv' | |
print("test!") | |
# file_path="E:/python-testn/pythonProject3/hh_1/dividing_into_different_subsets/3/QS/CC_QS.csv" | |
df = pd.read_csv(file_path) | |
# 将第一列作为索引 | |
df.set_index('Model', inplace=True) | |
# 转置数据框,使得模型作为列,横轴作为行 | |
df_transposed = df.T | |
# 使用plotly绘制折线图 | |
fig = px.line(df_transposed, x=df_transposed.index, y=df_transposed.columns, | |
title='Model Evaluation Results', | |
labels={'value': 'Evaluation Score', 'index': 'Evaluation Metric'}, | |
color_discrete_sequence=px.colors.qualitative.Plotly) | |
# 设置悬停效果 | |
fig.update_traces(hovertemplate='%{y}') | |
return fig | |
def toggle_radio(checkbox, radio): | |
return gr.update(visible=checkbox) | |
def toggle_line_counts_visibility(dataset): | |
if dataset == "MBPP": | |
return gr.update(visible=False) | |
else: | |
return gr.update(visible=True) | |
# 创建 Gradio 界面 | |
import gradio as gr | |
with gr.Blocks() as iface: | |
gr.HTML(""" | |
<style> | |
# body { | |
# max-width: 50%; /* 设置最大宽度为50% */ | |
# margin: 0 auto; /* 将内容居中 */ | |
# } | |
.title { | |
text-align: center; | |
font-size: 3em; | |
font-weight: bold; | |
margin-bottom: 0.5em; | |
} | |
.subtitle { | |
text-align: center; | |
font-size: 2em; | |
margin-bottom: 1em; | |
} | |
</style> | |
""") | |
with gr.Tabs() as tabs: | |
with gr.TabItem("Evaluation Result"): | |
with gr.Row(): | |
with gr.Column(scale=2): | |
with gr.Row(): | |
with gr.Column(): | |
dataset_radio = gr.Radio(["HumanEval", "MBPP"], label="Select Dataset ") | |
with gr.Row(): | |
custom_css = """ | |
<style> | |
.markdown-class { | |
font-family: 'Helvetica', sans-serif; | |
font-size: 20px; | |
font-weight: bold; | |
color: #333; | |
} | |
</style> | |
""" | |
with gr.Column(): | |
gr.Markdown( | |
f"{custom_css}<div class='markdown-class'> Choose Division Perspective </div>") | |
token_counts_checkbox = gr.Checkbox(label="I-The Number of Tokens in Problem Descriptions") | |
line_counts_checkbox = gr.Checkbox(label="II-The Number of Lines in Problem Descriptions") | |
dataset_radio.change(fn=toggle_line_counts_visibility, inputs=dataset_radio, | |
outputs=line_counts_checkbox) | |
cyclomatic_complexity_checkbox = gr.Checkbox(label="III-The Complexity of Reference Code") | |
problem_type_checkbox = gr.Checkbox(label="IV-Problem Types ") | |
css_code = """ | |
.dropdown-container { | |
display: none; | |
} | |
""" | |
with gr.Column(): | |
# gr.Markdown("<div class='markdown-class'>Choose Subsets </div>") | |
num_parts_dropdown = gr.Dropdown(choices=[0,3, 4, 5, 6, 7, 8], label="Choose the Number of Subsets",value="") | |
with gr.Row(): | |
with gr.Column(): | |
token_counts_radio = gr.Radio( | |
["Equal Frequency Partitioning", "Equal Interval Partitioning"], | |
label="Choose the Division Method for Perspective-I", | |
visible=False) | |
with gr.Column(): | |
line_counts_radio = gr.Radio( | |
["Equal Frequency Partitioning", "Equal Interval Partitioning"], | |
label="Choose the Division Method for Perspective-II", | |
visible=False) | |
with gr.Column(): | |
cyclomatic_complexity_radio = gr.Radio( | |
["Equal Frequency Partitioning", "Equal Interval Partitioning"], | |
label="Choose the Division Method for Perspective-III", | |
visible=False) | |
token_counts_checkbox.change(fn=lambda x: toggle_radio(x, token_counts_radio), | |
inputs=token_counts_checkbox, outputs=token_counts_radio) | |
line_counts_checkbox.change(fn=lambda x: toggle_radio(x, line_counts_radio), | |
inputs=line_counts_checkbox, outputs=line_counts_radio) | |
cyclomatic_complexity_checkbox.change(fn=lambda x: toggle_radio(x, cyclomatic_complexity_radio), | |
inputs=cyclomatic_complexity_checkbox, | |
outputs=cyclomatic_complexity_radio) | |
with gr.Tabs() as inner_tabs: | |
with gr.TabItem("Ranking Table"): | |
dataframe_output = gr.Dataframe(elem_id="dataframe") | |
css_output = gr.HTML() | |
confirm_button = gr.Button("Confirm ") | |
confirm_button.click(fn=on_confirm, inputs=[dataset_radio, num_parts_dropdown, token_counts_radio, | |
line_counts_radio, cyclomatic_complexity_radio, | |
problem_type_checkbox], | |
outputs=dataframe_output) | |
with gr.TabItem("Line chart"): | |
select_radio = gr.Radio(choices=[], label="Select One Perpective") | |
checkboxes = [token_counts_checkbox, line_counts_checkbox, cyclomatic_complexity_checkbox, | |
problem_type_checkbox] | |
for checkbox in checkboxes: | |
checkbox.change(fn=update_radio_options, inputs=checkboxes, outputs=select_radio) | |
select_radio.change(fn=plot_csv, inputs=[dataset_radio, select_radio, num_parts_dropdown], | |
outputs=gr.Plot(label="Line Plot ")) | |
# with gr.TabItem("Upload Inference File"): | |
# gr.Markdown("Upload a JSON file") | |
# with gr.Row(): | |
# with gr.Column(): | |
# string_input = gr.Textbox(label="Enter the Model Name") | |
# number_input = gr.Number(label="Select the Number of Samples") | |
# dataset_choice = gr.Dropdown(label="Select Dataset", choices=["HumanEval", "MBPP"]) | |
# with gr.Column(): | |
# file_input = gr.File(label="Upload Generation Result in JSON file") | |
# upload_button = gr.Button("Confirm and Upload") | |
# json_output = gr.JSON(label="") | |
# upload_button.click(fn=generate_file, inputs=[file_input, string_input, number_input, dataset_choice], | |
# outputs=json_output) | |
css = """ | |
#scale1 { | |
border: 1px solid rgba(0, 0, 0, 0.2); | |
padding: 10px; | |
border-radius: 8px; | |
background-color: #f9f9f9; | |
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); | |
} | |
} | |
""" | |
gr.HTML(f"<style>{css}</style>") | |
# 初始化数据表格 | |
# initial_df = show_data(False, False, False, False, False, False, False) | |
# initial_css = generate_css(False, False, False, False, True, False, False) | |
# dataframe_output.value = initial_df | |
# css_output.value = f"<style>{initial_css}</style>" | |
# 启动界面 | |
iface.launch() |