Spaces:

UltraRonin
/

LR2Bench

Running

App Files Files Community

UltraRonin commited on Mar 13

Commit

84010af

1 Parent(s): 1bcaf5a

add

Browse files

Files changed (2) hide show

app.py +8 -86
src/about.py +39 -0

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from src.about import (
     EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
     TASK_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
 )
@@ -138,96 +139,17 @@ with demo:
         #     gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text")
-                json_input = gr.File(label="Please upload an JSON file", type="file")
-                output = gr.Textbox(label="解析后的 JSON 内容", lines=10)
-                json_input.change(process_json, inputs=json_input, outputs=output)
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
-            )
     with gr.Row():
         # gr.Markdown()

     EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
     TASK_TEXT,
+    SUBMIT_TEMPLATE,
     LLM_BENCHMARKS_TEXT,
     TITLE,
 )
         #     gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
         with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text")
+            gr.Markdown("## Submission Template", elem_classes="markdown-text")
+            gr.Markdown(SUBMIT_TEMPLATE, elem_classes="markdown-text", height=250)
+            file_input = gr.File(label="Upload JSON File", file_types=[".json"], height=150)
+            json_output = gr.JSON(label="Parsed JSON Data")  # 输出 JSON 数据
+            submit_button = gr.Button("Submit")
+            submit_button.click(fn=process_json, inputs=file_input, outputs=json_output)
     with gr.Row():
         # gr.Markdown()

src/about.py CHANGED Viewed

@@ -54,6 +54,45 @@ TASK_TEXT = {
         'Drop_Quote': 'The Drop Quote task comprises a grid of multiple rows and columns, with each column providing a set of candidate letters. The task requires determining the correct row for letters in each column, effectively "dropping" it into target place to reveal the hidden quotation. We created 50 easy samples by manually compiling common quotations, and collected 50 hard samples from <a href="https://www.printable-puzzles.com/printable-drop-quotes.php" target="_blank"> Printable Puzzles</a>, with timestamps ranging from September 2024 to December 2024.'
 }
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works

         'Drop_Quote': 'The Drop Quote task comprises a grid of multiple rows and columns, with each column providing a set of candidate letters. The task requires determining the correct row for letters in each column, effectively "dropping" it into target place to reveal the hidden quotation. We created 50 easy samples by manually compiling common quotations, and collected 50 hard samples from <a href="https://www.printable-puzzles.com/printable-drop-quotes.php" target="_blank"> Printable Puzzles</a>, with timestamps ranging from September 2024 to December 2024.'
 }
+SUBMIT_TEMPLATE = """
+```python
+{
+    "config": {
+        "model_name": "deepseek-ai/DeepSeek-R1", # your model name
+        "link": "https://huggingface.co/deepseek-ai/DeepSeek-R1", # your model link if available
+        "Params": 671, # number of parameters if available
+        "show_on_leaderboard": true, # whether to show your model on the leaderboard
+    },
+    "results": {
+        "Acrostic": {
+            "TAG1": "RESPONSE1",
+            "TAG2": "RESPONSE2",
+        },
+        "Crossword": {
+            "TAG1": "RESPONSE1",
+            "TAG2": "RESPONSE2",
+        },
+        "Cryptogram": {
+            "TAG1": "RESPONSE1",
+            "TAG2": "RESPONSE2",
+        },
+        "Logic_Puzzle": {
+            "TAG1": "RESPONSE1",
+            "TAG2": "RESPONSE2",
+        },
+        "Sudoku": {
+            "TAG1": "RESPONSE1",
+            "TAG2": "RESPONSE2",
+        },
+        "Drop_Quote": {
+            "TAG1": "RESPONSE1",
+            "TAG2": "RESPONSE2",
+        }
+    }
+}
+```
+"""
 # Which evaluations are you running? how can people reproduce what you have?
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works