Spaces:

WeijianQi1999
/

Test

Sleeping

File size: 5,312 Bytes

import os
import json
import datetime
import requests
from email.utils import parseaddr

import gradio as gr
import pandas as pd
import numpy as np

from datasets import load_dataset, VerificationMode
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi

# InfoStrings
from scorer import question_scorer
from content import format_error, format_warning, format_log, TITLE, DATA_DATASET, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION

TOKEN = os.environ.get("TOKEN", None)

OWNER="Online Mind2Web"
# api = HfApi()

YEAR_VERSION = "2024"

LOCAL_DEBUG = True

# Display the results
def get_dataframe_from_results(eval_path):
    df = pd.read_csv(eval_path)
    df = df.sort_values(by=["Average SR"], ascending=False)
    for format_column in ['Easy', 'Medium', 'Hard', 'Average SR']:
        df[format_column] = df[format_column].map('{:.1f}'.format)
    # df["Average SR"] = df["Average SR"].map('{:.1f}'.format)
    return df

# auto_df = pd.read_csv("./auto_Mind2Web-Online - Leaderboard_data.csv")
# human_df = pd.read_csv("./human_Mind2Web-Online - Leaderboard_data.csv")
auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')


# def restart_space():
#     api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)

TYPES = ["str", "str", "str", "str", "number", "number", "number", "number", "str"]

def refresh():
    auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv')
    human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv')
    return auto_eval_dataframe_test, human_eval_dataframe_test

def upload_file(files):
    file_paths = [file.name for file in files]
    return file_paths


demo = gr.Blocks()
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                elem_id="citation-button",
                lines=10,
            ) #.style(show_copy_button=True)

    with gr.Tab("Human Evaluation", elem_id="human-tab", id=1):
        human_leaderboard_table_test = gr.components.Dataframe(
            value=human_eval_dataframe_test, datatype=TYPES, interactive=False,
            column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
        )
    with gr.Tab("Auto Evaluation", elem_id="auto-tab", id=2):
        auto_leaderboard_table_test = gr.components.Dataframe(
            value=auto_eval_dataframe_test, datatype=TYPES, interactive=False,
            column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"]
        )

    with gr.Tab("Submission Guideline", elem_id="submit-tab", id=3):
        with gr.Row():
            gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")

    refresh_button = gr.Button("Refresh")
    refresh_button.click(
        refresh,
        inputs=[],
        outputs=[
            auto_leaderboard_table_test,
            human_leaderboard_table_test,
        ],
    )
    # gr.Markdown(DATA_DATASET, elem_classes="markdown-text")
    # with gr.Row():
    #     # gr.Image(value="./figure/distribution_reference_length.png", label="Distribution of reference length", show_label=True, scale=0.4)
    #     gr.Image(value="./figure/Difficulty.png", label="Number of tasks by difficulty level", show_label=True, scale=0.4)
    # with gr.Row():
    #     gr.Image(value="./figure/distribution_website.jpg", label="Distribution of websites.",show_label=True, scale=0.4)
    # with gr.Row():
    #     gr.Image(value="./figure/popularity.jpg", label="Popularity of websites.", show_label=True, scale=0.4)
    # with gr.Accordion("Submit a new agent for evaluation"):
    #     with gr.Row():
    #         gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
    #     with gr.Row():
    #         with gr.Column():
    #             model_name_textbox = gr.Textbox(label="Agent name")
    #             model_family_textbox = gr.Textbox(label="Model family")
    #             organisation = gr.Textbox(label="Organization")
    #             mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)")
    #             file_output = gr.File()


    #     with gr.Row():
    #         gr.LoginButton()
    #         submit_button = gr.Button("Submit Eval")
    #     submission_result = gr.Markdown()
        # submit_button.click(
        #     [
        #         level_of_test,
        #         model_name_textbox,
        #         model_family_textbox,
        #         system_prompt_textbox,
        #         url_textbox,
        #         file_output,
        #         organisation,
        #         mail
        #     ],
        #     submission_result,
        # )

scheduler = BackgroundScheduler()
# scheduler.add_job(restart_space, "interval", seconds=3600)
scheduler.start()
demo.launch(debug=True)