Spaces:

khhuiyh
/

AutoEval-Video_LeaderBoard

Runtime error

App Files Files Community

khhuiyh commited on Nov 29, 2023

Commit

e2de889

1 Parent(s): 9532449

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -11

app.py CHANGED Viewed

@@ -15,13 +15,19 @@ LEADERBORAD_INTRODUCTION = """# AutoEval-Video Leaderboard
     """
 SUBMIT_INTRODUCTION = """# Submit Introduction
-    For example, if you want to upload GPT-4V's result in the leaderboard, you need to:
-    1. Fill in 'GPT-4V' in 'Model Name' if it is your first time to submit your result. Alternatively, if you wish to modify the outcomes of your model, please add a version suffix after the model's name like 'GPT-4V_v2'.
-    2. Upload results.json.
-    3. Click the 'Evaluate' button.
-    4. Click 'Refresh' to obtain the uploaded leaderboard.
-    5. The evaluation results of your model will be given in the "Overall Acc." box. For results specific to each evaluation dimension, please refer back to the leaderboard.
 """
 TABLE_INTRODUCTION = """The table below shows the performance of various models on different evaluation dimensions on AutoEval-Video.
         We use accuracy(%) as the primary evaluation metric for each dimension.
@@ -118,6 +124,7 @@ def save_json(modelname, user_dict_list):
 def add_new_eval(
     input_file,
     model_name_textbox: str,
 ):
     if len(model_name_textbox) == 0:
         return "Error! Empty model name!", get_result_df()
@@ -127,6 +134,7 @@ def add_new_eval(
     else:
         csv_data = pd.read_csv(CSV_DIR, dtype={'Model': str})
         model_name_list = list(csv_data['Model'])
         if model_name_textbox in model_name_list:
             return "In the leaderboard, there already exists a model with the same name, and duplicate submissions of it are not allowed.", get_result_df()
@@ -151,7 +159,7 @@ def add_new_eval(
         col = csv_data.shape[0]
         new_data = [
-            model_name_textbox,
             average_accuracy_video,
             each_task_accuracy[id2questiontype[1]],
             each_task_accuracy[id2questiontype[2]],
@@ -226,10 +234,14 @@ with block:
             with gr.Row():
                 gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
-            with gr.Column():
-                model_name_textbox = gr.Textbox(
-                    label="Model name"
                     )
             with gr.Column():
@@ -243,6 +255,7 @@ with block:
                     inputs = [
                         input_file,
                         model_name_textbox,
                     ],
                     outputs = [overall_acc, data_component],
                 )

     """
 SUBMIT_INTRODUCTION = """# Submit Introduction
+1. Format your model output as a JSON file, following the [example](https://github.com/Xiuyuan-Chen/AutoEval-Video/blob/main/prediction_sample.json) provided in our GitHub repository.
+2. Assign a unique "model name" for your results.
+3. Include the link to your model's repository with each submission.
+4. After selecting 'Evaluation', allow approximately one hour for your model's results to be processed. To view the most recent results in the leaderboard, click 'Refresh'.
 """
+# SUBMIT_INTRODUCTION = """# Submit Introduction
+#     For example, if you want to upload GPT-4V's result in the leaderboard, you need to:
+#     1. Fill in 'GPT-4V' in 'Model Name' if it is your first time to submit your result. Alternatively, if you wish to modify the outcomes of your model, please add a version suffix after the model's name like 'GPT-4V_v2'.
+#     2. Upload results.json.
+#     3. Click the 'Evaluate' button.
+#     4. Click 'Refresh' to obtain the uploaded leaderboard.
+#     5. The evaluation results of your model will be given in the "Overall Acc." box. For results specific to each evaluation dimension, please refer back to the leaderboard.
+# """
 TABLE_INTRODUCTION = """The table below shows the performance of various models on different evaluation dimensions on AutoEval-Video.
         We use accuracy(%) as the primary evaluation metric for each dimension.
 def add_new_eval(
     input_file,
     model_name_textbox: str,
+    model_link: str
 ):
     if len(model_name_textbox) == 0:
         return "Error! Empty model name!", get_result_df()
     else:
         csv_data = pd.read_csv(CSV_DIR, dtype={'Model': str})
         model_name_list = list(csv_data['Model'])
+        model_name_list = [name.split(']')[0][1:] for name in model_name_list]
         if model_name_textbox in model_name_list:
             return "In the leaderboard, there already exists a model with the same name, and duplicate submissions of it are not allowed.", get_result_df()
         col = csv_data.shape[0]
         new_data = [
+            '[' + model_name_textbox + '](' + model_link + ')',
             average_accuracy_video,
             each_task_accuracy[id2questiontype[1]],
             each_task_accuracy[id2questiontype[2]],
             with gr.Row():
                 gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
+            with gr.Row():
+                with gr.Column():
+                    model_name_textbox = gr.Textbox(
+                        label="Model name"
+                        )
+                with gr.Column():
+                    model_link = gr.Textbox(
+                        label="Model Link"
                     )
             with gr.Column():
                     inputs = [
                         input_file,
                         model_name_textbox,
+                        model_link,
                     ],
                     outputs = [overall_acc, data_component],
                 )