Sebastian Deatc commited on
Commit
43cf100
·
verified ·
1 Parent(s): 8d4665d
Files changed (1) hide show
  1. app.py +221 -197
app.py CHANGED
@@ -1,199 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
- from src.about import (
7
- CITATION_BUTTON_LABEL,
8
- CITATION_BUTTON_TEXT,
9
- EVALUATION_QUEUE_TEXT,
10
- INTRODUCTION_TEXT,
11
- LLM_BENCHMARKS_TEXT,
12
- TITLE,
13
- )
14
- from src.display.css_html_js import custom_css
15
- from src.display.utils import (
16
- BENCHMARK_COLS,
17
- COLS,
18
- EVAL_COLS,
19
- EVAL_TYPES,
20
- AutoEvalColumn,
21
- ModelType,
22
- fields,
23
- WeightType,
24
- Precision
25
- )
26
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
27
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
28
- from src.submission.submit import add_new_eval
29
-
30
-
31
- def restart_space():
32
- API.restart_space(repo_id=REPO_ID)
33
-
34
- ### Space initialization
35
- try:
36
- snapshot_download(
37
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
38
- )
39
- except Exception:
40
- restart_space()
41
- try:
42
- snapshot_download(
43
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
44
- )
45
- except Exception:
46
- restart_space()
47
-
48
- # Prepare your DataFrame
49
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
50
-
51
- # Initialize DataFrames for evaluation queues
52
- finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
53
-
54
- def init_leaderboard(dataframe):
55
- if dataframe is None or dataframe.empty:
56
- raise ValueError("Leaderboard DataFrame is empty or None.")
57
- return Leaderboard(
58
- value=dataframe,
59
- datatype=[c.type for c in fields(AutoEvalColumn)],
60
- select_columns=SelectColumns(
61
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
62
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
63
- label="Select Columns to Display:",
64
- ),
65
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
66
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
67
- filter_columns=[
68
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
69
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
70
- ColumnFilter(
71
- AutoEvalColumn.params.name,
72
- type="slider",
73
- min=0.01,
74
- max=150,
75
- label="Select the number of parameters (B)",
76
- ),
77
- ColumnFilter(
78
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
79
- ),
80
- ],
81
- bool_checkboxgroup_label="Hide models",
82
- interactive=False,
83
- )
84
-
85
- # Start Gradio interface
86
- demo = gr.Blocks(css=custom_css)
87
- with demo:
88
- gr.HTML(TITLE)
89
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
90
-
91
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
92
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
93
- leaderboard = init_leaderboard(LEADERBOARD_DF) # Use the prepared DataFrame
94
- gr.Row().update(leaderboard) # Ensure the leaderboard is included
95
-
96
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
97
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
98
-
99
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
100
- with gr.Column():
101
- with gr.Row():
102
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.Column():
105
- with gr.Accordion(
106
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
107
- open=False,
108
- ):
109
- with gr.Row():
110
- finished_eval_table = gr.components.Dataframe(
111
- value=finished_eval_queue_df,
112
- headers=EVAL_COLS,
113
- datatype=EVAL_TYPES,
114
- row_count=5,
115
- )
116
- with gr.Accordion(
117
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
118
- open=False,
119
- ):
120
- with gr.Row():
121
- running_eval_table = gr.components.Dataframe(
122
- value=running_eval_queue_df,
123
- headers=EVAL_COLS,
124
- datatype=EVAL_TYPES,
125
- row_count=5,
126
- )
127
-
128
- with gr.Accordion(
129
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
130
- open=False,
131
- ):
132
- with gr.Row():
133
- pending_eval_table = gr.components.Dataframe(
134
- value=pending_eval_queue_df,
135
- headers=EVAL_COLS,
136
- datatype=EVAL_TYPES,
137
- row_count=5,
138
- )
139
- with gr.Row():
140
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
141
-
142
- with gr.Row():
143
- with gr.Column():
144
- model_name_textbox = gr.Textbox(label="Model name")
145
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
146
- model_type = gr.Dropdown(
147
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
148
- label="Model type",
149
- multiselect=False,
150
- value=None,
151
- interactive=True,
152
- )
153
-
154
- with gr.Column():
155
- precision = gr.Dropdown(
156
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
157
- label="Precision",
158
- multiselect=False,
159
- value="float16",
160
- interactive=True,
161
- )
162
- weight_type = gr.Dropdown(
163
- choices=[i.value.name for i in WeightType],
164
- label="Weights type",
165
- multiselect=False,
166
- value="Original",
167
- interactive=True,
168
- )
169
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
170
-
171
- submit_button = gr.Button("Submit Eval")
172
- submission_result = gr.Markdown()
173
- submit_button.click(
174
- add_new_eval,
175
- [
176
- model_name_textbox,
177
- base_model_name_textbox,
178
- revision_name_textbox,
179
- precision,
180
- weight_type,
181
- model_type,
182
- ],
183
- submission_result,
184
- )
185
-
186
- with gr.Row():
187
- with gr.Accordion("📙 Citation", open=False):
188
- citation_button = gr.Textbox(
189
- value=CITATION_BUTTON_TEXT,
190
- label=CITATION_BUTTON_LABEL,
191
- lines=20,
192
- elem_id="citation-button",
193
- show_copy_button=True,
194
- )
195
-
196
- scheduler = BackgroundScheduler()
197
- scheduler.add_job(restart_space, "interval", seconds=1800)
198
- scheduler.start()
199
- demo.queue(default_concurrency_limit=40).launch()
 
1
+ # import gradio as gr
2
+ # from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
+ # import pandas as pd
4
+ # from apscheduler.schedulers.background import BackgroundScheduler
5
+ # from huggingface_hub import snapshot_download
6
+ # from src.about import (
7
+ # CITATION_BUTTON_LABEL,
8
+ # CITATION_BUTTON_TEXT,
9
+ # EVALUATION_QUEUE_TEXT,
10
+ # INTRODUCTION_TEXT,
11
+ # LLM_BENCHMARKS_TEXT,
12
+ # TITLE,
13
+ # )
14
+ # from src.display.css_html_js import custom_css
15
+ # from src.display.utils import (
16
+ # BENCHMARK_COLS,
17
+ # COLS,
18
+ # EVAL_COLS,
19
+ # EVAL_TYPES,
20
+ # AutoEvalColumn,
21
+ # ModelType,
22
+ # fields,
23
+ # WeightType,
24
+ # Precision
25
+ # )
26
+ # from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
27
+ # from src.populate import get_evaluation_queue_df, get_leaderboard_df
28
+ # from src.submission.submit import add_new_eval
29
+
30
+
31
+ # def restart_space():
32
+ # API.restart_space(repo_id=REPO_ID)
33
+
34
+ # ### Space initialization
35
+ # try:
36
+ # snapshot_download(
37
+ # repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
38
+ # )
39
+ # except Exception:
40
+ # restart_space()
41
+ # try:
42
+ # snapshot_download(
43
+ # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
44
+ # )
45
+ # except Exception:
46
+ # restart_space()
47
+
48
+ # # Prepare your DataFrame
49
+ # LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
50
+
51
+ # # Initialize DataFrames for evaluation queues
52
+ # finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
53
+
54
+ # def init_leaderboard(dataframe):
55
+ # if dataframe is None or dataframe.empty:
56
+ # raise ValueError("Leaderboard DataFrame is empty or None.")
57
+ # return Leaderboard(
58
+ # value=dataframe,
59
+ # datatype=[c.type for c in fields(AutoEvalColumn)],
60
+ # select_columns=SelectColumns(
61
+ # default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
62
+ # cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
63
+ # label="Select Columns to Display:",
64
+ # ),
65
+ # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
66
+ # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
67
+ # filter_columns=[
68
+ # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
69
+ # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
70
+ # ColumnFilter(
71
+ # AutoEvalColumn.params.name,
72
+ # type="slider",
73
+ # min=0.01,
74
+ # max=150,
75
+ # label="Select the number of parameters (B)",
76
+ # ),
77
+ # ColumnFilter(
78
+ # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
79
+ # ),
80
+ # ],
81
+ # bool_checkboxgroup_label="Hide models",
82
+ # interactive=False,
83
+ # )
84
+
85
+ # # Start Gradio interface
86
+ # demo = gr.Blocks(css=custom_css)
87
+ # with demo:
88
+ # gr.HTML(TITLE)
89
+ # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
90
+
91
+ # with gr.Tabs(elem_classes="tab-buttons") as tabs:
92
+ # with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
93
+ # leaderboard = init_leaderboard(LEADERBOARD_DF) # Use the prepared DataFrame
94
+ # gr.Row().update(leaderboard) # Ensure the leaderboard is included
95
+
96
+ # with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
97
+ # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
98
+
99
+ # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
100
+ # with gr.Column():
101
+ # with gr.Row():
102
+ # gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
103
+
104
+ # with gr.Column():
105
+ # with gr.Accordion(
106
+ # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
107
+ # open=False,
108
+ # ):
109
+ # with gr.Row():
110
+ # finished_eval_table = gr.components.Dataframe(
111
+ # value=finished_eval_queue_df,
112
+ # headers=EVAL_COLS,
113
+ # datatype=EVAL_TYPES,
114
+ # row_count=5,
115
+ # )
116
+ # with gr.Accordion(
117
+ # f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
118
+ # open=False,
119
+ # ):
120
+ # with gr.Row():
121
+ # running_eval_table = gr.components.Dataframe(
122
+ # value=running_eval_queue_df,
123
+ # headers=EVAL_COLS,
124
+ # datatype=EVAL_TYPES,
125
+ # row_count=5,
126
+ # )
127
+
128
+ # with gr.Accordion(
129
+ # f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
130
+ # open=False,
131
+ # ):
132
+ # with gr.Row():
133
+ # pending_eval_table = gr.components.Dataframe(
134
+ # value=pending_eval_queue_df,
135
+ # headers=EVAL_COLS,
136
+ # datatype=EVAL_TYPES,
137
+ # row_count=5,
138
+ # )
139
+ # with gr.Row():
140
+ # gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
141
+
142
+ # with gr.Row():
143
+ # with gr.Column():
144
+ # model_name_textbox = gr.Textbox(label="Model name")
145
+ # revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
146
+ # model_type = gr.Dropdown(
147
+ # choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
148
+ # label="Model type",
149
+ # multiselect=False,
150
+ # value=None,
151
+ # interactive=True,
152
+ # )
153
+
154
+ # with gr.Column():
155
+ # precision = gr.Dropdown(
156
+ # choices=[i.value.name for i in Precision if i != Precision.Unknown],
157
+ # label="Precision",
158
+ # multiselect=False,
159
+ # value="float16",
160
+ # interactive=True,
161
+ # )
162
+ # weight_type = gr.Dropdown(
163
+ # choices=[i.value.name for i in WeightType],
164
+ # label="Weights type",
165
+ # multiselect=False,
166
+ # value="Original",
167
+ # interactive=True,
168
+ # )
169
+ # base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
170
+
171
+ # submit_button = gr.Button("Submit Eval")
172
+ # submission_result = gr.Markdown()
173
+ # submit_button.click(
174
+ # add_new_eval,
175
+ # [
176
+ # model_name_textbox,
177
+ # base_model_name_textbox,
178
+ # revision_name_textbox,
179
+ # precision,
180
+ # weight_type,
181
+ # model_type,
182
+ # ],
183
+ # submission_result,
184
+ # )
185
+
186
+ # with gr.Row():
187
+ # with gr.Accordion("📙 Citation", open=False):
188
+ # citation_button = gr.Textbox(
189
+ # value=CITATION_BUTTON_TEXT,
190
+ # label=CITATION_BUTTON_LABEL,
191
+ # lines=20,
192
+ # elem_id="citation-button",
193
+ # show_copy_button=True,
194
+ # )
195
+
196
+ # scheduler = BackgroundScheduler()
197
+ # scheduler.add_job(restart_space, "interval", seconds=1800)
198
+ # scheduler.start()
199
+ # demo.queue(default_concurrency_limit=40).launch()
200
+
201
+
202
  import gradio as gr
 
203
  import pandas as pd
204
+
205
+ # Sample DataFrame
206
+ data = {
207
+ 'Model': ['Model A', 'Model B', 'Model C'],
208
+ 'Accuracy': [0.95, 0.90, 0.85],
209
+ 'F1 Score': [0.96, 0.89, 0.84]
210
+ }
211
+ df = pd.DataFrame(data)
212
+
213
+ # Function to display the DataFrame
214
+ def display_table():
215
+ return df
216
+
217
+ # Gradio Interface
218
+ with gr.Blocks() as demo:
219
+ gr.Markdown("# Benchmark Results")
220
+ gr.DataFrame(value=df, label="Benchmark Table", interactive=False) # Display the DataFrame
221
+
222
+ # Launch the Gradio app
223
+ demo.launch()