Howieeeee commited on
Commit
e585775
·
verified ·
1 Parent(s): 148ebe5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +47 -566
  2. constants.py +34 -166
app.py CHANGED
@@ -259,356 +259,25 @@ def upload_file(files):
259
  # print("success update", model_name)
260
  # return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
261
 
262
- def get_normalized_df(df):
263
- # final_score = df.drop('name', axis=1).sum(axis=1)
264
- # df.insert(1, 'Overall Score', final_score)
265
- normalize_df = df.copy().fillna(0.0)
266
- for column in normalize_df.columns[1:-5]:
267
- min_val = NORMALIZE_DIC[column]['Min']
268
- max_val = NORMALIZE_DIC[column]['Max']
269
- normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
270
- return normalize_df
271
-
272
- def get_normalized_i2v_df(df):
273
- normalize_df = df.copy().fillna(0.0)
274
- for column in normalize_df.columns[1:-5]:
275
- min_val = NORMALIZE_DIC_I2V[column]['Min']
276
- max_val = NORMALIZE_DIC_I2V[column]['Max']
277
- normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
278
- return normalize_df
279
-
280
-
281
- def calculate_selected_score(df, selected_columns):
282
- # selected_score = df[selected_columns].sum(axis=1)
283
- selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
284
- selected_SEMANTIC = [i for i in selected_columns if i in SEMANTIC_LIST]
285
- selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_QUALITY])
286
- selected_semantic_score = df[selected_SEMANTIC].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_SEMANTIC ])
287
- if selected_quality_score.isna().any().any() and selected_semantic_score.isna().any().any():
288
- selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
289
- return selected_score.fillna(0.0)
290
- if selected_quality_score.isna().any().any():
291
- return selected_semantic_score
292
- if selected_semantic_score.isna().any().any():
293
- return selected_quality_score
294
- # print(selected_semantic_score,selected_quality_score )
295
- selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
296
- return selected_score.fillna(0.0)
297
-
298
- def calculate_selected_score_i2v(df, selected_columns):
299
- # selected_score = df[selected_columns].sum(axis=1)
300
- selected_QUALITY = [i for i in selected_columns if i in I2V_QUALITY_LIST]
301
- selected_I2V = [i for i in selected_columns if i in I2V_LIST]
302
- selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_QUALITY])
303
- selected_i2v_score = df[selected_I2V].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_I2V ])
304
- if selected_quality_score.isna().any().any() and selected_i2v_score.isna().any().any():
305
- selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
306
- return selected_score.fillna(0.0)
307
- if selected_quality_score.isna().any().any():
308
- return selected_i2v_score
309
- if selected_i2v_score.isna().any().any():
310
- return selected_quality_score
311
- # print(selected_i2v_score,selected_quality_score )
312
- selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
313
- return selected_score.fillna(0.0)
314
-
315
- def get_final_score(df, selected_columns):
316
- normalize_df = get_normalized_df(df)
317
- #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
318
- try:
319
- for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1).drop("Evaluated by", axis=1).drop("Accessibility", axis=1):
320
- normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
321
- except:
322
- for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1):
323
- normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
324
- quality_score = normalize_df[QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in QUALITY_LIST])
325
- semantic_score = normalize_df[SEMANTIC_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in SEMANTIC_LIST ])
326
- final_score = (quality_score * QUALITY_WEIGHT + semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
327
- if 'Total Score' in df:
328
- df['Total Score'] = final_score
329
- else:
330
- df.insert(1, 'Total Score', final_score)
331
- if 'Semantic Score' in df:
332
- df['Semantic Score'] = semantic_score
333
- else:
334
- df.insert(2, 'Semantic Score', semantic_score)
335
- if 'Quality Score' in df:
336
- df['Quality Score'] = quality_score
337
- else:
338
- df.insert(3, 'Quality Score', quality_score)
339
- selected_score = calculate_selected_score(normalize_df, selected_columns)
340
- if 'Selected Score' in df:
341
- df['Selected Score'] = selected_score
342
- else:
343
- df.insert(1, 'Selected Score', selected_score)
344
- return df
345
-
346
- def get_final_score_i2v(df, selected_columns):
347
- normalize_df = get_normalized_i2v_df(df)
348
- try:
349
- for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1).drop("Evaluated by", axis=1).drop("Accessibility", axis=1):
350
- normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name]
351
- except:
352
- for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1):
353
- normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name]
354
- quality_score = normalize_df[I2V_QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_QUALITY_LIST])
355
- i2v_score = normalize_df[I2V_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_LIST ])
356
- final_score = (quality_score * I2V_QUALITY_WEIGHT + i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
357
- if 'Total Score' in df:
358
- df['Total Score'] = final_score
359
- else:
360
- df.insert(1, 'Total Score', final_score)
361
- if 'I2V Score' in df:
362
- df['I2V Score'] = i2v_score
363
- else:
364
- df.insert(2, 'I2V Score', i2v_score)
365
- if 'Quality Score' in df:
366
- df['Quality Score'] = quality_score
367
- else:
368
- df.insert(3, 'Quality Score', quality_score)
369
- selected_score = calculate_selected_score_i2v(normalize_df, selected_columns)
370
- if 'Selected Score' in df:
371
- df['Selected Score'] = selected_score
372
- else:
373
- df.insert(1, 'Selected Score', selected_score)
374
- # df.loc[df[9:].isnull().any(axis=1), ['Total Score', 'I2V Score']] = 'N.A.'
375
- mask = df.iloc[:, 5:-5].isnull().any(axis=1)
376
- df.loc[mask, ['Total Score', 'I2V Score','Selected Score' ]] = np.nan
377
- # df.fillna('N.A.', inplace=True)
378
- return df
379
-
380
-
381
-
382
- def get_final_score_quality(df, selected_columns):
383
- normalize_df = get_normalized_df(df)
384
- for name in normalize_df.drop('Model Name (clickable)', axis=1):
385
- normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
386
- quality_score = normalize_df[QUALITY_TAB].sum(axis=1) / sum([DIM_WEIGHT[i] for i in QUALITY_TAB])
387
-
388
- if 'Quality Score' in df:
389
- df['Quality Score'] = quality_score
390
- else:
391
- df.insert(1, 'Quality Score', quality_score)
392
- # selected_score = normalize_df[selected_columns].sum(axis=1) / len(selected_columns)
393
- selected_score = normalize_df[selected_columns].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_columns])
394
- if 'Selected Score' in df:
395
- df['Selected Score'] = selected_score
396
- else:
397
- df.insert(1, 'Selected Score', selected_score)
398
- return df
399
-
400
-
401
-
402
  def get_baseline_df():
403
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
404
  submission_repo.git_pull()
405
  df = pd.read_csv(CSV_DIR)
406
- df = get_final_score(df, checkbox_group.value)
407
- df = df.sort_values(by="Selected Score", ascending=False)
408
- present_columns = MODEL_INFO + checkbox_group.value
409
- # print(present_columns)
410
- df = df[present_columns]
411
- # Add this line to display the results evaluated by VBench by default
412
- df = df[df['Evaluated by'] == 'VBench Team']
413
- df = convert_scores_to_percentage(df)
414
- return df
415
-
416
- def get_baseline_df_quality():
417
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
418
- submission_repo.git_pull()
419
- df = pd.read_csv(QUALITY_DIR)
420
- df = get_final_score_quality(df, checkbox_group_quality.value)
421
- df = df.sort_values(by="Selected Score", ascending=False)
422
- present_columns = MODEL_INFO_TAB_QUALITY + checkbox_group_quality.value
423
- df = df[present_columns]
424
- df = convert_scores_to_percentage(df)
425
- return df
426
-
427
- def get_baseline_df_i2v():
428
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
429
- submission_repo.git_pull()
430
- df = pd.read_csv(I2V_DIR)
431
- df = get_final_score_i2v(df, checkbox_group_i2v.value)
432
- df = df.sort_values(by="Selected Score", ascending=False)
433
- present_columns = MODEL_INFO_TAB_I2V + checkbox_group_i2v.value
434
- # df = df[df["Sampled by"] == 'VBench Team']
435
- df = df[present_columns]
436
- df = convert_scores_to_percentage(df)
437
- return df
438
-
439
- def get_baseline_df_long():
440
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
441
- submission_repo.git_pull()
442
- df = pd.read_csv(LONG_DIR)
443
- df = get_final_score(df, checkbox_group.value)
444
- df = df.sort_values(by="Selected Score", ascending=False)
445
- present_columns = MODEL_INFO + checkbox_group.value
446
- # df = df[df["Sampled by"] == 'VBench Team']
447
- df = df[present_columns]
448
- df = convert_scores_to_percentage(df)
449
- return df
450
-
451
- def get_all_df(selected_columns, dir=CSV_DIR):
452
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
453
- submission_repo.git_pull()
454
- df = pd.read_csv(dir)
455
- df = get_final_score(df, selected_columns)
456
- df = df.sort_values(by="Selected Score", ascending=False)
457
- return df
458
-
459
- def get_all_df_quality(selected_columns, dir=QUALITY_DIR):
460
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
461
- submission_repo.git_pull()
462
- df = pd.read_csv(dir)
463
- df = get_final_score_quality(df, selected_columns)
464
- df = df.sort_values(by="Selected Score", ascending=False)
465
- return df
466
 
467
- def get_all_df_i2v(selected_columns, dir=I2V_DIR):
468
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
469
- submission_repo.git_pull()
470
- df = pd.read_csv(dir)
471
- df = get_final_score_i2v(df, selected_columns)
472
- df = df.sort_values(by="Selected Score", ascending=False)
473
  return df
474
 
475
- def get_all_df_long(selected_columns, dir=LONG_DIR):
476
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
477
  submission_repo.git_pull()
478
  df = pd.read_csv(dir)
479
- df = get_final_score(df, selected_columns)
480
- df = df.sort_values(by="Selected Score", ascending=False)
481
  return df
482
 
483
 
484
- def convert_scores_to_percentage(df):
485
- # Operate on every column in the DataFrame (except the'name 'column)
486
- if "Sampled by" in df.columns:
487
- skip_col =3
488
- else:
489
- skip_col =1
490
- print(df)
491
- for column in df.columns[skip_col:]: # 假设第一列是'name'
492
- # if df[column].isdigit():
493
- # print(df[column])
494
- # is_numeric = pd.to_numeric(df[column], errors='coerce').notna().all()
495
- valid_numeric_count = pd.to_numeric(df[column], errors='coerce').notna().sum()
496
- if valid_numeric_count > 0:
497
- df[column] = round(df[column] * 100,2)
498
- df[column] = df[column].apply(lambda x: f"{x:05.2f}%" if pd.notna(pd.to_numeric(x, errors='coerce')) else x)
499
- # df[column] = df[column].apply(lambda x: f"{x:05.2f}") + '%'
500
- return df
501
-
502
- def choose_all_quailty():
503
- return gr.update(value=QUALITY_LIST)
504
-
505
- def choose_all_semantic():
506
- return gr.update(value=SEMANTIC_LIST)
507
-
508
- def disable_all():
509
- return gr.update(value=[])
510
-
511
- def enable_all():
512
- return gr.update(value=TASK_INFO)
513
-
514
- # select function
515
- def on_filter_model_size_method_change(selected_columns, vbench_team_sample, vbench_team_eval=False):
516
- updated_data = get_all_df(selected_columns, CSV_DIR)
517
- if vbench_team_sample:
518
- updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
519
- if vbench_team_eval:
520
- updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
521
- #print(updated_data)
522
- # columns:
523
- selected_columns = [item for item in TASK_INFO if item in selected_columns]
524
- present_columns = MODEL_INFO + selected_columns
525
- updated_data = updated_data[present_columns]
526
- updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
527
- updated_data = convert_scores_to_percentage(updated_data)
528
- updated_headers = present_columns
529
- print(COLUMN_NAMES,updated_headers,DATA_TITILE_TYPE )
530
- update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
531
- # print(updated_data,present_columns,update_datatype)
532
- filter_component = gr.components.Dataframe(
533
- value=updated_data,
534
- headers=updated_headers,
535
- type="pandas",
536
- datatype=update_datatype,
537
- interactive=False,
538
- visible=True,
539
- )
540
- return filter_component#.value
541
-
542
- def on_filter_model_size_method_change_quality(selected_columns):
543
- updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
544
- #print(updated_data)
545
- # columns:
546
- selected_columns = [item for item in QUALITY_TAB if item in selected_columns]
547
- present_columns = MODEL_INFO_TAB_QUALITY + selected_columns
548
- updated_data = updated_data[present_columns]
549
- updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
550
- updated_data = convert_scores_to_percentage(updated_data)
551
- updated_headers = present_columns
552
- update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
553
- # print(updated_data,present_columns,update_datatype)
554
- filter_component = gr.components.Dataframe(
555
- value=updated_data,
556
- headers=updated_headers,
557
- type="pandas",
558
- datatype=update_datatype,
559
- interactive=False,
560
- visible=True,
561
- )
562
- return filter_component#.value
563
-
564
- def on_filter_model_size_method_change_i2v(selected_columns,vbench_team_sample, vbench_team_eval=False):
565
- updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
566
- if vbench_team_sample:
567
- updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
568
- # if vbench_team_eval:
569
- # updated_data = updated_data[updated_data['Eval'] == 'VBench Team']
570
- selected_columns = [item for item in I2V_TAB if item in selected_columns]
571
- present_columns = MODEL_INFO_TAB_I2V + selected_columns
572
- updated_data = updated_data[present_columns]
573
- updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
574
- updated_data = convert_scores_to_percentage(updated_data)
575
- updated_headers = present_columns
576
- update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES_I2V.index(x)] for x in updated_headers]
577
- # print(updated_data,present_columns,update_datatype)
578
- filter_component = gr.components.Dataframe(
579
- value=updated_data,
580
- headers=updated_headers,
581
- type="pandas",
582
- datatype=update_datatype,
583
- interactive=False,
584
- visible=True,
585
- )
586
- return filter_component#.value
587
-
588
- def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample, vbench_team_eval=False):
589
- updated_data = get_all_df_long(selected_columns, LONG_DIR)
590
- if vbench_team_sample:
591
- updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
592
- if vbench_team_eval:
593
- updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
594
- selected_columns = [item for item in TASK_INFO if item in selected_columns]
595
- present_columns = MODEL_INFO + selected_columns
596
- updated_data = updated_data[present_columns]
597
- updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
598
- updated_data = convert_scores_to_percentage(updated_data)
599
- updated_headers = present_columns
600
- update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
601
- filter_component = gr.components.Dataframe(
602
- value=updated_data,
603
- headers=updated_headers,
604
- type="pandas",
605
- datatype=update_datatype,
606
- interactive=False,
607
- visible=True,
608
- )
609
- return filter_component#.value
610
-
611
-
612
  block = gr.Blocks()
613
  with block:
614
  gr.Markdown(
@@ -624,238 +293,50 @@ with block:
624
  label="Model Type",
625
  interactive=True
626
  )
627
- # selection for column part:
628
- checkbox_group = gr.CheckboxGroup(
629
- choices=TASK_INFO,
630
- value=DEFAULT_INFO,
631
- label="Evaluation Dimension",
632
- interactive=True,
633
  )
634
-
635
- # data_component = gr.components.Dataframe(
636
- # value=get_baseline_df,
637
- # headers=COLUMN_NAMES,
638
- # type="pandas",
639
- # datatype=DATA_TITILE_TYPE,
640
- # interactive=False,
641
- # visible=True,
642
- # height=700,
643
- # )
644
-
645
- # choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
646
- # choosen_s.click(choose_all_semantic, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
647
- # # enable_b.click(enable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter], outputs=data_component)
648
- # disable_b.click(disable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
649
- # checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
650
- # vbench_team_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
651
- # vbench_validate_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
652
- # # Table 1
653
- # with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=2):
654
- # with gr.Accordion("INSTRUCTION", open=False):
655
- # citation_button = gr.Textbox(
656
- # value=QUALITY_CLAIM_TEXT,
657
- # label="",
658
- # elem_id="quality-button",
659
- # lines=2,
660
- # )
661
- # with gr.Row():
662
- # with gr.Column(scale=1.0):
663
- # # selection for column part:
664
-
665
- # checkbox_group_quality = gr.CheckboxGroup(
666
- # choices=QUALITY_TAB,
667
- # value=QUALITY_TAB,
668
- # label="Evaluation Quality Dimension",
669
- # interactive=True,
670
- # )
671
-
672
- # data_component_quality = gr.components.Dataframe(
673
- # value=get_baseline_df_quality,
674
- # headers=COLUMN_NAMES_QUALITY,
675
- # type="pandas",
676
- # datatype=DATA_TITILE_TYPE,
677
- # interactive=False,
678
- # visible=True,
679
- # )
680
-
681
- # checkbox_group_quality.change(fn=on_filter_model_size_method_change_quality, inputs=[checkbox_group_quality], outputs=data_component_quality)
682
-
683
- # # Table i2v
684
- # with gr.TabItem("VBench-I2V", elem_id="vbench-tab-table", id=3):
685
- # with gr.Accordion("NOTE", open=False):
686
- # i2v_note_button = gr.Textbox(
687
- # value=I2V_CLAIM_TEXT,
688
- # label="",
689
- # elem_id="quality-button",
690
- # lines=3,
691
- # )
692
- # with gr.Row():
693
- # with gr.Column(scale=1.0):
694
- # # selection for column part:
695
- # with gr.Row():
696
- # vbench_team_filter_i2v = gr.Checkbox(
697
- # label="Sampled by VBench Team (Uncheck to view all submissions)",
698
- # value=False,
699
- # interactive=True
700
- # )
701
- # vbench_validate_filter_i2v = gr.Checkbox(
702
- # label="Evaluated by VBench Team (Uncheck to view all submissions)",
703
- # value=False,
704
- # interactive=True
705
- # )
706
- # checkbox_group_i2v = gr.CheckboxGroup(
707
- # choices=I2V_TAB,
708
- # value=I2V_TAB,
709
- # label="Evaluation Quality Dimension",
710
- # interactive=True,
711
- # )
712
-
713
- # data_component_i2v = gr.components.Dataframe(
714
- # value=get_baseline_df_i2v,
715
- # headers=COLUMN_NAMES_I2V,
716
- # type="pandas",
717
- # datatype=I2V_TITILE_TYPE,
718
- # interactive=False,
719
- # visible=True,
720
- # )
721
-
722
- # checkbox_group_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
723
- # vbench_team_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
724
- # vbench_validate_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
725
-
726
- # with gr.TabItem("📊 VBench-Long", elem_id="vbench-tab-table", id=4):
727
- # with gr.Row():
728
- # with gr.Accordion("INSTRUCTION", open=False):
729
- # citation_button = gr.Textbox(
730
- # value=LONG_CLAIM_TEXT,
731
- # label="",
732
- # elem_id="long-ins-button",
733
- # lines=2,
734
- # )
735
-
736
- # gr.Markdown(
737
- # TABLE_INTRODUCTION
738
- # )
739
- # with gr.Row():
740
- # with gr.Column(scale=0.2):
741
- # choosen_q_long = gr.Button("Select Quality Dimensions")
742
- # choosen_s_long = gr.Button("Select Semantic Dimensions")
743
- # enable_b_long = gr.Button("Select All")
744
- # disable_b_long = gr.Button("Deselect All")
745
-
746
- # with gr.Column(scale=0.8):
747
- # with gr.Row():
748
- # vbench_team_filter_long = gr.Checkbox(
749
- # label="Sampled by VBench Team (Uncheck to view all submissions)",
750
- # value=False,
751
- # interactive=True
752
- # )
753
- # vbench_validate_filter_long = gr.Checkbox(
754
- # label="Evaluated by VBench Team (Uncheck to view all submissions)",
755
- # value=False,
756
- # interactive=True
757
- # )
758
- # checkbox_group_long = gr.CheckboxGroup(
759
- # choices=TASK_INFO,
760
- # value=DEFAULT_INFO,
761
- # label="Evaluation Dimension",
762
- # interactive=True,
763
- # )
764
-
765
- # data_component = gr.components.Dataframe(
766
- # value=get_baseline_df_long,
767
- # headers=COLUMN_NAMES,
768
- # type="pandas",
769
- # datatype=DATA_TITILE_TYPE,
770
- # interactive=False,
771
- # visible=True,
772
- # height=700,
773
- # )
774
-
775
- # choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
776
- # choosen_s_long.click(choose_all_semantic, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
777
- # enable_b_long.click(enable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
778
- # disable_b_long.click(disable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
779
- # checkbox_group_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=data_component)
780
- # vbench_team_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=data_component)
781
- # vbench_validate_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=data_component)
782
-
783
- # # table info
784
- # with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=5):
785
- # gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
786
-
787
- # # table submission
788
- # with gr.TabItem("🚀 [T2V]Submit here! ", elem_id="mvbench-tab-table", id=6):
789
- # gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
790
-
791
- # with gr.Row():
792
- # gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
793
-
794
- # with gr.Row():
795
- # gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
796
-
797
- # with gr.Row():
798
- # gr.Markdown("Here is a required field", elem_classes="markdown-text")
799
- # with gr.Row():
800
- # with gr.Column():
801
- # model_name_textbox = gr.Textbox(
802
- # label="Model name", placeholder="Required field"
803
- # )
804
- # revision_name_textbox = gr.Textbox(
805
- # label="Revision Model Name(Optional)", placeholder="If you need to update the previous results, please fill in this line"
806
- # )
807
- # access_type = gr.Dropdown(["Open Source", "Ready to Open Source", "API", "Close"], label="Please select the way user can access your model. You can update the content by revision_name, or contact the VBench Team.")
808
-
809
- # with gr.Column():
810
- # model_link = gr.Textbox(
811
- # label="Project Page/Paper Link/Github/HuggingFace Repo", placeholder="Required field. If filling in the wrong information, your results may be removed."
812
- # )
813
- # team_name = gr.Textbox(
814
- # label="Your Team Name(If left blank, it will be user upload)", placeholder="User Upload"
815
- # )
816
- # contact_email = gr.Textbox(
817
- # label="E-Mail(Will not be displayed)", placeholder="Required field"
818
- # )
819
- # with gr.Row():
820
- # gr.Markdown("The following is optional and will be synced to [GitHub] (https://github.com/Vchitect/VBench/tree/master/sampled_videos#what-are-the-details-of-the-video-generation-models)", elem_classes="markdown-text")
821
- # with gr.Row():
822
- # release_time = gr.Textbox(label="Time of Publish", placeholder="1970-01-01")
823
- # model_resolution = gr.Textbox(label="resolution", placeholder="Width x Height")
824
- # model_fps = gr.Textbox(label="model fps", placeholder="FPS(int)")
825
- # model_frame = gr.Textbox(label="model frame count", placeholder="INT")
826
- # model_video_length = gr.Textbox(label="model video length", placeholder="float(2.0)")
827
- # model_checkpoint = gr.Textbox(label="model checkpoint", placeholder="optional")
828
- # model_commit_id = gr.Textbox(label="github commit id", placeholder='main')
829
- # model_video_format = gr.Textbox(label="pipeline format", placeholder='mp4')
830
- # with gr.Column():
831
- # input_file = gr.components.File(label = "Click to Upload a ZIP File", file_count="single", type='binary')
832
- # submit_button = gr.Button("Submit Eval")
833
- # submit_succ_button = gr.Markdown("Submit Success! Please press refresh and return to LeaderBoard!", visible=False)
834
- # fail_textbox = gr.Markdown('<span style="color:red;">Please ensure that the `Model Name`, `Project Page`, and `Email` are filled in correctly.</span>', elem_classes="markdown-text",visible=False)
835
 
 
 
 
 
 
 
 
 
 
836
 
837
- # submission_result = gr.Markdown()
838
- # # submit_button.click(
839
- # # add_new_eval,
840
- # # inputs = [
841
- # # input_file,
842
- # # model_name_textbox,
843
- # # revision_name_textbox,
844
- # # model_link,
845
- # # team_name,
846
- # # contact_email,
847
- # # release_time,
848
- # # access_type,
849
- # # model_resolution,
850
- # # model_fps,
851
- # # model_frame,
852
- # # model_video_length,
853
- # # model_checkpoint,
854
- # # model_commit_id,
855
- # # model_video_format
856
- # # ],
857
- # # outputs=[submit_button, submit_succ_button, fail_textbox]
858
- # # )
 
 
 
 
 
859
 
860
  # with gr.TabItem("🚀 [I2V]Submit here! ", elem_id="mvbench-i2v-tab-table", id=7):
861
  # gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
 
259
  # print("success update", model_name)
260
  # return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
261
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  def get_baseline_df():
263
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
264
  submission_repo.git_pull()
265
  df = pd.read_csv(CSV_DIR)
266
+ df = df.sort_values(by=DEFAULT_INFO[0], ascending=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
+ # Add this line to display the results of all model types by default
269
+ df = df[df['Model Type'].isin(model_type_filter.value)]
270
+ # Add this line to display the results of both abilities by default
271
+ df = df[df['Ability'].isin(ability_filter.value)]
 
 
272
  return df
273
 
274
+ def get_all_df(dir=CSV_DIR):
275
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
276
  submission_repo.git_pull()
277
  df = pd.read_csv(dir)
 
 
278
  return df
279
 
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  block = gr.Blocks()
282
  with block:
283
  gr.Markdown(
 
293
  label="Model Type",
294
  interactive=True
295
  )
296
+ ability_filter = gr.CheckboxGroup(
297
+ choices=ABILITY,
298
+ value=DEFAULT_ABILITY,
299
+ label="Ability",
300
+ interactive=True
 
301
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+ data_component = gr.components.Dataframe(
304
+ column_widths="auto", # Automatically adjusts column widths
305
+ value=get_baseline_df(),
306
+ headers=COLUMN_NAMES,
307
+ type="pandas",
308
+ datatype=DATA_TITILE_TYPE,
309
+ interactive=False,
310
+ visible=True,
311
+ )
312
 
313
+ def on_filter_change(model_types, abilities):
314
+ df = get_baseline_df()
315
+ # Filter by selected model types
316
+ df = df[df['Model Type'].isin(model_types)]
317
+ # Filter by selected abilities
318
+ df = df[df['Ability'].isin(abilities)]
319
+ return gr.Dataframe(
320
+ column_widths="auto", # Automatically adjusts column widths
321
+ value=df,
322
+ headers=COLUMN_NAMES,
323
+ type="pandas",
324
+ datatype=DATA_TITILE_TYPE,
325
+ interactive=False,
326
+ visible=True
327
+ )
328
+
329
+ model_type_filter.change(
330
+ fn=on_filter_change,
331
+ inputs=[model_type_filter, ability_filter],
332
+ outputs=data_component
333
+ )
334
+
335
+ ability_filter.change(
336
+ fn=on_filter_change,
337
+ inputs=[model_type_filter, ability_filter],
338
+ outputs=data_component
339
+ )
340
 
341
  # with gr.TabItem("🚀 [I2V]Submit here! ", elem_id="mvbench-i2v-tab-table", id=7):
342
  # gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
constants.py CHANGED
@@ -1,193 +1,61 @@
1
  import os
2
  # this is .py for store constants
3
  MODEL_TYPE = [
4
- "video generation",
5
- "3D scene generation",
6
- "4D scene generation"
7
  ]
8
  DEFAULT_MODEL_TYPE = [
9
- "video generation",
10
- "3D scene generation",
11
- "4D scene generation"
12
  ]
13
 
14
- MODEL_INFO = [
15
- "Model Name (clickable)",
16
- "Sampled by",
17
- "Evaluated by",
18
- "Accessibility",
19
- "Date",
20
- "Total Score",
21
- "Quality Score",
22
- "Semantic Score",
23
- "Selected Score",
24
  ]
25
-
26
- MODEL_INFO_TAB_QUALITY = [
27
- "Model Name (clickable)",
28
- "Quality Score",
29
- "Selected Score"
30
- ]
31
-
32
- MODEL_INFO_TAB_I2V = [
33
- "Model Name (clickable)",
34
- "Sampled by",
35
- "Evaluated by",
36
- "Accessibility",
37
- "Date",
38
- "Total Score",
39
- "I2V Score",
40
- "Quality Score",
41
- "Selected Score"
42
  ]
43
 
44
  TASK_INFO = [
45
- "subject consistency",
46
- "background consistency",
47
- "temporal flickering",
48
- "motion smoothness",
49
- "dynamic degree",
50
- "aesthetic quality",
51
- "imaging quality",
52
- "object class",
53
- "multiple objects",
54
- "human action",
55
- "color",
56
- "spatial relationship",
57
- "scene",
58
- "appearance style",
59
- "temporal style",
60
- "overall consistency"
61
  ]
62
-
63
  DEFAULT_INFO = [
64
- "subject consistency",
65
- "background consistency",
66
- "temporal flickering",
67
- "motion smoothness",
68
- "dynamic degree",
69
- "aesthetic quality",
70
- "imaging quality",
71
- "object class",
72
- "multiple objects",
73
- "human action",
74
- "color",
75
- "spatial relationship",
76
- "scene",
77
- "appearance style",
78
- "temporal style",
79
- "overall consistency"
80
- ]
81
-
82
- QUALITY_LIST = [
83
- "subject consistency",
84
- "background consistency",
85
- "temporal flickering",
86
- "motion smoothness",
87
- "aesthetic quality",
88
- "imaging quality",
89
- "dynamic degree",]
90
-
91
- SEMANTIC_LIST = [
92
- "object class",
93
- "multiple objects",
94
- "human action",
95
- "color",
96
- "spatial relationship",
97
- "scene",
98
- "appearance style",
99
- "temporal style",
100
- "overall consistency"
101
  ]
102
 
103
- QUALITY_TAB = [
104
- "subject consistency",
105
- "background consistency",
106
- "motion smoothness",
107
- "aesthetic quality",
108
- "imaging quality",
109
- "dynamic degree",]
110
-
111
- I2V_LIST = [
112
- "Video-Text Camera Motion",
113
- "Video-Image Subject Consistency",
114
- "Video-Image Background Consistency",
115
- ]
116
-
117
- I2V_QUALITY_LIST = [
118
- "Subject Consistency",
119
- "Background Consistency",
120
- "Motion Smoothness",
121
- "Dynamic Degree",
122
- "Aesthetic Quality",
123
- "Imaging Quality",
124
- # "Temporal Flickering"
125
- ]
126
-
127
- I2V_TAB = [
128
- "Video-Text Camera Motion",
129
- "Video-Image Subject Consistency",
130
- "Video-Image Background Consistency",
131
- "Subject Consistency",
132
- "Background Consistency",
133
- "Motion Smoothness",
134
- "Dynamic Degree",
135
- "Aesthetic Quality",
136
- "Imaging Quality",
137
- # "Temporal Flickering"
138
  ]
139
 
140
- DIM_WEIGHT = {
141
- "subject consistency":1,
142
- "background consistency":1,
143
- "temporal flickering":1,
144
- "motion smoothness":1,
145
- "aesthetic quality":1,
146
- "imaging quality":1,
147
- "dynamic degree":0.5,
148
- "object class":1,
149
- "multiple objects":1,
150
- "human action":1,
151
- "color":1,
152
- "spatial relationship":1,
153
- "scene":1,
154
- "appearance style":1,
155
- "temporal style":1,
156
- "overall consistency":1
157
- }
158
-
159
- DIM_WEIGHT_I2V = {
160
- "Video-Text Camera Motion": 0.1,
161
- "Video-Image Subject Consistency": 1,
162
- "Video-Image Background Consistency": 1,
163
- "Subject Consistency": 1,
164
- "Background Consistency": 1,
165
- "Motion Smoothness": 1,
166
- "Dynamic Degree": 0.5,
167
- "Aesthetic Quality": 1,
168
- "Imaging Quality": 1,
169
- "Temporal Flickering": 1
170
- }
171
-
172
- SEMANTIC_WEIGHT = 1
173
- QUALITY_WEIGHT = 4
174
- I2V_WEIGHT = 1.0
175
- I2V_QUALITY_WEIGHT = 1.0
176
-
177
- DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
178
- I2V_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
179
 
180
  SUBMISSION_NAME = "worldscore_leaderboard_submission"
181
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Howieeeee/", SUBMISSION_NAME)
182
  CSV_DIR = "./worldscore_leaderboard_submission/results.csv"
183
- QUALITY_DIR = "./worldscore_leaderboard_submission/quality.csv"
184
- I2V_DIR = "./worldscore_leaderboard_submission/i2v_results.csv"
185
- LONG_DIR = "./worldscore_leaderboard_submission/long_debug.csv"
186
  INFO_DIR = "./worldscore_leaderboard_submission/model_info.csv"
187
 
188
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
189
- COLUMN_NAMES_QUALITY = MODEL_INFO_TAB_QUALITY + QUALITY_TAB
190
- COLUMN_NAMES_I2V = MODEL_INFO_TAB_I2V + I2V_TAB
191
 
192
  LEADERBORAD_INTRODUCTION = """# WorldScore Leaderboard
193
 
 
1
  import os
2
  # this is .py for store constants
3
  MODEL_TYPE = [
4
+ "Video",
5
+ "3D",
6
+ "4D"
7
  ]
8
  DEFAULT_MODEL_TYPE = [
9
+ "Video",
10
+ "3D",
11
+ "4D"
12
  ]
13
 
14
+ ABILITY = [
15
+ "I2V",
16
+ "T2V"
 
 
 
 
 
 
 
17
  ]
18
+ DEFAULT_ABILITY = [
19
+ "I2V",
20
+ "T2V"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  ]
22
 
23
  TASK_INFO = [
24
+ "WorldScore-Static",
25
+ "WorldScore-Dynamic",
26
+ "Camera Control",
27
+ "Object Control",
28
+ "Content Alignment",
29
+ "3D Consistency",
30
+ "Photometric Consistency",
31
+ "Style Consistency",
32
+ "Subjective Quality",
33
+ "Motion Accuracy",
34
+ "Motion Magnitude",
35
+ "Motion Smoothness",
 
 
 
 
36
  ]
 
37
  DEFAULT_INFO = [
38
+ "WorldScore-Static",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  ]
40
 
41
+ MODEL_INFO = [
42
+ "Model Type",
43
+ "Model Name",
44
+ "Ability",
45
+ "Sampled by",
46
+ "Evaluated by",
47
+ "Accessibility",
48
+ "Date",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  ]
50
 
51
+ DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  SUBMISSION_NAME = "worldscore_leaderboard_submission"
54
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Howieeeee/", SUBMISSION_NAME)
55
  CSV_DIR = "./worldscore_leaderboard_submission/results.csv"
 
 
 
56
  INFO_DIR = "./worldscore_leaderboard_submission/model_info.csv"
57
 
58
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
 
 
59
 
60
  LEADERBORAD_INTRODUCTION = """# WorldScore Leaderboard
61