UltraRonin commited on
Commit
84010af
·
1 Parent(s): 1bcaf5a
Files changed (2) hide show
  1. app.py +8 -86
  2. src/about.py +39 -0
app.py CHANGED
@@ -11,6 +11,7 @@ from src.about import (
11
  EVALUATION_QUEUE_TEXT,
12
  INTRODUCTION_TEXT,
13
  TASK_TEXT,
 
14
  LLM_BENCHMARKS_TEXT,
15
  TITLE,
16
  )
@@ -138,96 +139,17 @@ with demo:
138
  # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
139
 
140
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
141
- with gr.Column():
142
- with gr.Row():
143
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
144
-
145
- with gr.Column():
146
- with gr.Accordion(
147
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
148
- open=False,
149
- ):
150
- with gr.Row():
151
- finished_eval_table = gr.components.Dataframe(
152
- value=finished_eval_queue_df,
153
- headers=EVAL_COLS,
154
- datatype=EVAL_TYPES,
155
- row_count=5,
156
- )
157
- with gr.Accordion(
158
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
159
- open=False,
160
- ):
161
- with gr.Row():
162
- running_eval_table = gr.components.Dataframe(
163
- value=running_eval_queue_df,
164
- headers=EVAL_COLS,
165
- datatype=EVAL_TYPES,
166
- row_count=5,
167
- )
168
-
169
- with gr.Accordion(
170
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
171
- open=False,
172
- ):
173
- with gr.Row():
174
- pending_eval_table = gr.components.Dataframe(
175
- value=pending_eval_queue_df,
176
- headers=EVAL_COLS,
177
- datatype=EVAL_TYPES,
178
- row_count=5,
179
- )
180
  with gr.Row():
181
  gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text")
182
 
183
- json_input = gr.File(label="Please upload an JSON file", type="file")
184
- output = gr.Textbox(label="解析后的 JSON 内容", lines=10)
185
-
186
- json_input.change(process_json, inputs=json_input, outputs=output)
 
 
 
187
 
188
- with gr.Row():
189
- with gr.Column():
190
- model_name_textbox = gr.Textbox(label="Model name")
191
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
192
- model_type = gr.Dropdown(
193
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
194
- label="Model type",
195
- multiselect=False,
196
- value=None,
197
- interactive=True,
198
- )
199
-
200
- with gr.Column():
201
- precision = gr.Dropdown(
202
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
203
- label="Precision",
204
- multiselect=False,
205
- value="float16",
206
- interactive=True,
207
- )
208
- weight_type = gr.Dropdown(
209
- choices=[i.value.name for i in WeightType],
210
- label="Weights type",
211
- multiselect=False,
212
- value="Original",
213
- interactive=True,
214
- )
215
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
216
-
217
- submit_button = gr.Button("Submit Eval")
218
- submission_result = gr.Markdown()
219
- submit_button.click(
220
- add_new_eval,
221
- [
222
- model_name_textbox,
223
- base_model_name_textbox,
224
- revision_name_textbox,
225
- precision,
226
- weight_type,
227
- model_type,
228
- ],
229
- submission_result,
230
- )
231
 
232
  with gr.Row():
233
  # gr.Markdown()
 
11
  EVALUATION_QUEUE_TEXT,
12
  INTRODUCTION_TEXT,
13
  TASK_TEXT,
14
+ SUBMIT_TEMPLATE,
15
  LLM_BENCHMARKS_TEXT,
16
  TITLE,
17
  )
 
139
  # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
140
 
141
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  with gr.Row():
143
  gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text")
144
 
145
+ gr.Markdown("## Submission Template", elem_classes="markdown-text")
146
+ gr.Markdown(SUBMIT_TEMPLATE, elem_classes="markdown-text", height=250)
147
+
148
+ file_input = gr.File(label="Upload JSON File", file_types=[".json"], height=150)
149
+ json_output = gr.JSON(label="Parsed JSON Data") # 输出 JSON 数据
150
+ submit_button = gr.Button("Submit")
151
+ submit_button.click(fn=process_json, inputs=file_input, outputs=json_output)
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  with gr.Row():
155
  # gr.Markdown()
src/about.py CHANGED
@@ -54,6 +54,45 @@ TASK_TEXT = {
54
  'Drop_Quote': 'The Drop Quote task comprises a grid of multiple rows and columns, with each column providing a set of candidate letters. The task requires determining the correct row for letters in each column, effectively "dropping" it into target place to reveal the hidden quotation. We created 50 easy samples by manually compiling common quotations, and collected 50 hard samples from <a href="https://www.printable-puzzles.com/printable-drop-quotes.php" target="_blank"> Printable Puzzles</a>, with timestamps ranging from September 2024 to December 2024.'
55
  }
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Which evaluations are you running? how can people reproduce what you have?
58
  LLM_BENCHMARKS_TEXT = f"""
59
  ## How it works
 
54
  'Drop_Quote': 'The Drop Quote task comprises a grid of multiple rows and columns, with each column providing a set of candidate letters. The task requires determining the correct row for letters in each column, effectively "dropping" it into target place to reveal the hidden quotation. We created 50 easy samples by manually compiling common quotations, and collected 50 hard samples from <a href="https://www.printable-puzzles.com/printable-drop-quotes.php" target="_blank"> Printable Puzzles</a>, with timestamps ranging from September 2024 to December 2024.'
55
  }
56
 
57
+ SUBMIT_TEMPLATE = """
58
+ ```python
59
+ {
60
+ "config": {
61
+ "model_name": "deepseek-ai/DeepSeek-R1", # your model name
62
+ "link": "https://huggingface.co/deepseek-ai/DeepSeek-R1", # your model link if available
63
+ "Params": 671, # number of parameters if available
64
+ "show_on_leaderboard": true, # whether to show your model on the leaderboard
65
+ },
66
+ "results": {
67
+ "Acrostic": {
68
+ "TAG1": "RESPONSE1",
69
+ "TAG2": "RESPONSE2",
70
+ },
71
+ "Crossword": {
72
+ "TAG1": "RESPONSE1",
73
+ "TAG2": "RESPONSE2",
74
+ },
75
+ "Cryptogram": {
76
+ "TAG1": "RESPONSE1",
77
+ "TAG2": "RESPONSE2",
78
+ },
79
+ "Logic_Puzzle": {
80
+ "TAG1": "RESPONSE1",
81
+ "TAG2": "RESPONSE2",
82
+ },
83
+ "Sudoku": {
84
+ "TAG1": "RESPONSE1",
85
+ "TAG2": "RESPONSE2",
86
+ },
87
+ "Drop_Quote": {
88
+ "TAG1": "RESPONSE1",
89
+ "TAG2": "RESPONSE2",
90
+ }
91
+ }
92
+ }
93
+ ```
94
+ """
95
+
96
  # Which evaluations are you running? how can people reproduce what you have?
97
  LLM_BENCHMARKS_TEXT = f"""
98
  ## How it works