|
import gradio as gr |
|
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter |
|
import config |
|
from pathlib import Path |
|
import pandas as pd |
|
from datetime import datetime |
|
|
|
abs_path = Path(__file__).parent |
|
|
|
|
|
df = pd.read_json(str(abs_path / "leader_board.json")) |
|
|
|
|
|
|
|
|
|
|
|
head_content = """ |
|
# 🏅 BlinkCode Leaderboard |
|
### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](https://github.com/YJQuantumLeap/BlinkCode). |
|
|
|
### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task. |
|
## Main Evaluation Results |
|
- Metrics: |
|
- Avg Score: The average score on all task (normalized to 0 - 100, the higher the better). |
|
- The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy. |
|
- The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better). |
|
- By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️. |
|
- The ⭐ symbol indicates results that have undergone two rounds of refinement. |
|
|
|
|
|
This leaderboard was last updated: <nowtime>. |
|
""" |
|
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, |
|
title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, |
|
author={OpenCompass Contributors}, |
|
howpublished = {\url{https://github.com/open-compass/opencompass}}, |
|
year={2023} |
|
}""" |
|
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" |
|
unique_models_count = df["Model"].nunique() |
|
|
|
nowtime = datetime.now() |
|
formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S") |
|
head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count)) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(head_content) |
|
with gr.Tabs(): |
|
Leaderboard( |
|
value=df, |
|
select_columns=SelectColumns( |
|
default_selection=config.ON_LOAD_COLUMNS, |
|
cant_deselect=["Rank", "Model"], |
|
label="Select Columns to Display:", |
|
), |
|
search_columns=["Model", "Model Type"], |
|
hide_columns=["Model Size", "Model Type", "Supports multiple images"], |
|
filter_columns=[ |
|
"Model Size", |
|
"Model Type", |
|
"Supports multiple images" |
|
|
|
], |
|
datatype=config.TYPES, |
|
column_widths=["5%", "15%"], |
|
) |
|
with gr.Row(): |
|
with gr.Accordion('Citation', open=False): |
|
citation_button = gr.Textbox( |
|
value=CITATION_BUTTON_TEXT, |
|
label=CITATION_BUTTON_LABEL, |
|
elem_id='citation-button') |
|
if __name__ == "__main__": |
|
demo.launch() |