Spaces:
Sleeping
Sleeping
import os | |
import json | |
import datetime | |
import requests | |
from email.utils import parseaddr | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
from datasets import load_dataset, VerificationMode | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from huggingface_hub import HfApi | |
# InfoStrings | |
from scorer import question_scorer | |
from content import format_error, format_warning, format_log, TITLE, DATA_DATASET, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION | |
TOKEN = os.environ.get("TOKEN", None) | |
OWNER="Online Mind2Web" | |
# api = HfApi() | |
YEAR_VERSION = "2024" | |
LOCAL_DEBUG = True | |
# Display the results | |
def get_dataframe_from_results(eval_path): | |
df = pd.read_csv(eval_path) | |
df = df.sort_values(by=["Average SR"], ascending=False) | |
for format_column in ['Easy', 'Medium', 'Hard', 'Average SR']: | |
df[format_column] = df[format_column].map('{:.1f}'.format) | |
# df["Average SR"] = df["Average SR"].map('{:.1f}'.format) | |
return df | |
# auto_df = pd.read_csv("./auto_Mind2Web-Online - Leaderboard_data.csv") | |
# human_df = pd.read_csv("./human_Mind2Web-Online - Leaderboard_data.csv") | |
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv') | |
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv') | |
# def restart_space(): | |
# api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN) | |
TYPES = ["str", "str", "str", "str", "number", "number", "number", "number", "str"] | |
def refresh(): | |
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv') | |
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv') | |
return auto_eval_dataframe_test, human_eval_dataframe_test | |
def upload_file(files): | |
file_paths = [file.name for file in files] | |
return file_paths | |
demo = gr.Blocks() | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Row(): | |
with gr.Accordion("π Citation", open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
lines=10, | |
) #.style(show_copy_button=True) | |
with gr.Tab("Human Evaluation", elem_id="human-tab", id=1): | |
human_leaderboard_table_test = gr.components.Dataframe( | |
value=human_eval_dataframe_test, datatype=TYPES, interactive=False, | |
column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"] | |
) | |
with gr.Tab("Auto Evaluation", elem_id="auto-tab", id=2): | |
auto_leaderboard_table_test = gr.components.Dataframe( | |
value=auto_eval_dataframe_test, datatype=TYPES, interactive=False, | |
column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"] | |
) | |
with gr.Tab("Submission Guideline", elem_id="submit-tab", id=3): | |
with gr.Row(): | |
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") | |
refresh_button = gr.Button("Refresh") | |
refresh_button.click( | |
refresh, | |
inputs=[], | |
outputs=[ | |
auto_leaderboard_table_test, | |
human_leaderboard_table_test, | |
], | |
) | |
# gr.Markdown(DATA_DATASET, elem_classes="markdown-text") | |
# with gr.Row(): | |
# # gr.Image(value="./figure/distribution_reference_length.png", label="Distribution of reference length", show_label=True, scale=0.4) | |
# gr.Image(value="./figure/Difficulty.png", label="Number of tasks by difficulty level", show_label=True, scale=0.4) | |
# with gr.Row(): | |
# gr.Image(value="./figure/distribution_website.jpg", label="Distribution of websites.",show_label=True, scale=0.4) | |
# with gr.Row(): | |
# gr.Image(value="./figure/popularity.jpg", label="Popularity of websites.", show_label=True, scale=0.4) | |
# with gr.Accordion("Submit a new agent for evaluation"): | |
# with gr.Row(): | |
# gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") | |
# with gr.Row(): | |
# with gr.Column(): | |
# model_name_textbox = gr.Textbox(label="Agent name") | |
# model_family_textbox = gr.Textbox(label="Model family") | |
# organisation = gr.Textbox(label="Organization") | |
# mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)") | |
# file_output = gr.File() | |
# with gr.Row(): | |
# gr.LoginButton() | |
# submit_button = gr.Button("Submit Eval") | |
# submission_result = gr.Markdown() | |
# submit_button.click( | |
# [ | |
# level_of_test, | |
# model_name_textbox, | |
# model_family_textbox, | |
# system_prompt_textbox, | |
# url_textbox, | |
# file_output, | |
# organisation, | |
# ], | |
# submission_result, | |
# ) | |
scheduler = BackgroundScheduler() | |
# scheduler.add_job(restart_space, "interval", seconds=3600) | |
scheduler.start() | |
demo.launch(debug=True) | |