Howieeeee commited on
Commit
7cbbfe6
·
verified ·
1 Parent(s): 239e9da

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +14 -6
  2. app.py +968 -0
  3. constants.py +266 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,20 @@
1
  ---
2
- title: Test
3
- emoji: 🌖
4
- colorFrom: yellow
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.23.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
1
  ---
2
+ title: VBench Leaderboard
3
+ emoji: 📊
4
+ colorFrom: indigo
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # VBench Leaderboard
14
+
15
+ ## Space Description
16
+
17
+ - **Repository:** [VBench](https://github.com/Vchitect/VBench)
18
+ - **Paper:** [2311.17982](arxiv.org/abs/2311.17982)
19
+ - **Paper:** [2411.13503](arxiv.org/abs/2411.13503)
20
+ - **Point of Contact:** mailto:[Vchitect]([email protected])
app.py ADDED
@@ -0,0 +1,968 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
2
+ import os
3
+ import io
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import json
7
+ import shutil
8
+ import tempfile
9
+ import datetime
10
+ import zipfile
11
+ import numpy as np
12
+
13
+
14
+ from constants import *
15
+ from huggingface_hub import Repository
16
+ HF_TOKEN = os.environ.get("HF_TOKEN")
17
+
18
+ global data_component, filter_component
19
+
20
+
21
+ def upload_file(files):
22
+ file_paths = [file.name for file in files]
23
+ return file_paths
24
+
25
+ def add_new_eval(
26
+ input_file,
27
+ model_name_textbox: str,
28
+ revision_name_textbox: str,
29
+ model_link: str,
30
+ team_name: str,
31
+ contact_email: str,
32
+ access_type: str,
33
+ model_publish: str,
34
+ model_resolution: str,
35
+ model_fps: str,
36
+ model_frame: str,
37
+ model_video_length: str,
38
+ model_checkpoint: str,
39
+ model_commit_id: str,
40
+ model_video_format: str
41
+ ):
42
+ if input_file is None:
43
+ return "Error! Empty file!"
44
+ if model_link == '' or model_name_textbox == '' or contact_email == '':
45
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
46
+ # upload_data=json.loads(input_file)
47
+ upload_content = input_file
48
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
49
+ submission_repo.git_pull()
50
+ filename = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
51
+
52
+ now = datetime.datetime.now()
53
+ update_time = now.strftime("%Y-%m-%d") # Capture update time
54
+ with open(f'{SUBMISSION_NAME}/{filename}.zip','wb') as f:
55
+ f.write(input_file)
56
+ # shutil.copyfile(CSV_DIR, os.path.join(SUBMISSION_NAME, f"{input_file}"))
57
+
58
+ csv_data = pd.read_csv(CSV_DIR)
59
+
60
+ if revision_name_textbox == '':
61
+ col = csv_data.shape[0]
62
+ model_name = model_name_textbox.replace(',',' ')
63
+ else:
64
+ model_name = revision_name_textbox.replace(',',' ')
65
+ model_name_list = csv_data['Model Name (clickable)']
66
+ name_list = [name.split(']')[0][1:] for name in model_name_list]
67
+ if revision_name_textbox not in name_list:
68
+ col = csv_data.shape[0]
69
+ else:
70
+ col = name_list.index(revision_name_textbox)
71
+ if model_link == '':
72
+ model_name = model_name # no url
73
+ else:
74
+ model_name = '[' + model_name + '](' + model_link + ')'
75
+
76
+ os.makedirs(filename, exist_ok=True)
77
+ with zipfile.ZipFile(io.BytesIO(input_file), 'r') as zip_ref:
78
+ zip_ref.extractall(filename)
79
+
80
+ upload_data = {}
81
+ for file in os.listdir(filename):
82
+ if file.startswith('.') or file.startswith('__'):
83
+ print(f"Skip the file: {file}")
84
+ continue
85
+ cur_file = os.path.join(filename, file)
86
+ if os.path.isdir(cur_file):
87
+ for subfile in os.listdir(cur_file):
88
+ if subfile.endswith(".json"):
89
+ with open(os.path.join(cur_file, subfile)) as ff:
90
+ cur_json = json.load(ff)
91
+ print(file, type(cur_json))
92
+ if isinstance(cur_json, dict):
93
+ print(cur_json.keys())
94
+ for key in cur_json:
95
+ upload_data[key.replace('_',' ')] = cur_json[key][0]
96
+ print(f"{key}:{cur_json[key][0]}")
97
+ elif cur_file.endswith('json'):
98
+ with open(cur_file) as ff:
99
+ cur_json = json.load(ff)
100
+ print(file, type(cur_json))
101
+ if isinstance(cur_json, dict):
102
+ print(cur_json.keys())
103
+ for key in cur_json:
104
+ upload_data[key.replace('_',' ')] = cur_json[key][0]
105
+ print(f"{key}:{cur_json[key][0]}")
106
+ # add new data
107
+ new_data = [model_name]
108
+ print('upload_data:', upload_data)
109
+ for key in TASK_INFO:
110
+ if key in upload_data:
111
+ new_data.append(upload_data[key])
112
+ else:
113
+ new_data.append(0)
114
+ if team_name =='' or 'vbench' in team_name.lower():
115
+ new_data.append("User Upload")
116
+ else:
117
+ new_data.append(team_name)
118
+
119
+ new_data.append(contact_email.replace(',',' and ')) # Add contact email [private]
120
+ new_data.append(update_time) # Add the update time
121
+ new_data.append(team_name)
122
+ new_data.append(access_type)
123
+
124
+ csv_data.loc[col] = new_data
125
+ csv_data = csv_data.to_csv(CSV_DIR, index=False)
126
+ with open(INFO_DIR,'a') as f:
127
+ f.write(f"{model_name}\t{update_time}\t{model_publish}\t{model_resolution}\t{model_fps}\t{model_frame}\t{model_video_length}\t{model_checkpoint}\t{model_commit_id}\t{model_video_format}\n")
128
+ submission_repo.push_to_hub()
129
+ print("success update", model_name)
130
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
131
+
132
+ def add_new_eval_i2v(
133
+ input_file,
134
+ model_name_textbox: str,
135
+ revision_name_textbox: str,
136
+ model_link: str,
137
+ team_name: str,
138
+ contact_email: str,
139
+ access_type: str,
140
+ model_publish: str,
141
+ model_resolution: str,
142
+ model_fps: str,
143
+ model_frame: str,
144
+ model_video_length: str,
145
+ model_checkpoint: str,
146
+ model_commit_id: str,
147
+ model_video_format: str
148
+ ):
149
+ COLNAME2KEY={
150
+ "Video-Text Camera Motion":"camera_motion",
151
+ "Video-Image Subject Consistency": "i2v_subject",
152
+ "Video-Image Background Consistency": "i2v_background",
153
+ "Subject Consistency": "subject_consistency",
154
+ "Background Consistency": "background_consistency",
155
+ "Motion Smoothness": "motion_smoothness",
156
+ "Dynamic Degree": "dynamic_degree",
157
+ "Aesthetic Quality": "aesthetic_quality",
158
+ "Imaging Quality": "imaging_quality",
159
+ "Temporal Flickering": "temporal_flickering"
160
+ }
161
+ if input_file is None:
162
+ return "Error! Empty file!"
163
+ if model_link == '' or model_name_textbox == '' or contact_email == '':
164
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
165
+
166
+ upload_content = input_file
167
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
168
+ submission_repo.git_pull()
169
+ filename = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
170
+
171
+ now = datetime.datetime.now()
172
+ update_time = now.strftime("%Y-%m-%d") # Capture update time
173
+ with open(f'{SUBMISSION_NAME}/{filename}.zip','wb') as f:
174
+ f.write(input_file)
175
+ # shutil.copyfile(CSV_DIR, os.path.join(SUBMISSION_NAME, f"{input_file}"))
176
+
177
+ csv_data = pd.read_csv(I2V_DIR)
178
+
179
+ if revision_name_textbox == '':
180
+ col = csv_data.shape[0]
181
+ model_name = model_name_textbox.replace(',',' ')
182
+ else:
183
+ model_name = revision_name_textbox.replace(',',' ')
184
+ model_name_list = csv_data['Model Name (clickable)']
185
+ name_list = [name.split(']')[0][1:] for name in model_name_list]
186
+ if revision_name_textbox not in name_list:
187
+ col = csv_data.shape[0]
188
+ else:
189
+ col = name_list.index(revision_name_textbox)
190
+ if model_link == '':
191
+ model_name = model_name # no url
192
+ else:
193
+ model_name = '[' + model_name + '](' + model_link + ')'
194
+
195
+ os.makedirs(filename, exist_ok=True)
196
+ with zipfile.ZipFile(io.BytesIO(input_file), 'r') as zip_ref:
197
+ zip_ref.extractall(filename)
198
+
199
+ upload_data = {}
200
+ for file in os.listdir(filename):
201
+ if file.startswith('.') or file.startswith('__'):
202
+ print(f"Skip the file: {file}")
203
+ continue
204
+ cur_file = os.path.join(filename, file)
205
+ if os.path.isdir(cur_file):
206
+ for subfile in os.listdir(cur_file):
207
+ if subfile.endswith(".json"):
208
+ with open(os.path.join(cur_file, subfile)) as ff:
209
+ cur_json = json.load(ff)
210
+ print(file, type(cur_json))
211
+ if isinstance(cur_json, dict):
212
+ print(cur_json.keys())
213
+ for key in cur_json:
214
+ upload_data[key] = cur_json[key][0]
215
+ print(f"{key}:{cur_json[key][0]}")
216
+ elif cur_file.endswith('json'):
217
+ with open(cur_file) as ff:
218
+ cur_json = json.load(ff)
219
+ print(file, type(cur_json))
220
+ if isinstance(cur_json, dict):
221
+ print(cur_json.keys())
222
+ for key in cur_json:
223
+ upload_data[key] = cur_json[key][0]
224
+ print(f"{key}:{cur_json[key][0]}")
225
+ # add new data
226
+ new_data = [model_name]
227
+ print('upload_data:', upload_data)
228
+ I2V_HEAD= ["Video-Text Camera Motion",
229
+ "Video-Image Subject Consistency",
230
+ "Video-Image Background Consistency",
231
+ "Subject Consistency",
232
+ "Background Consistency",
233
+ "Temporal Flickering",
234
+ "Motion Smoothness",
235
+ "Dynamic Degree",
236
+ "Aesthetic Quality",
237
+ "Imaging Quality" ]
238
+ for key in I2V_HEAD :
239
+ sub_key = COLNAME2KEY[key]
240
+ if sub_key in upload_data:
241
+ new_data.append(upload_data[sub_key])
242
+ else:
243
+ new_data.append(0)
244
+ if team_name =='' or 'vbench' in team_name.lower():
245
+ new_data.append("User Upload")
246
+ else:
247
+ new_data.append(team_name)
248
+
249
+ new_data.append(contact_email.replace(',',' and ')) # Add contact email [private]
250
+ new_data.append(update_time) # Add the update time
251
+ new_data.append(team_name)
252
+ new_data.append(access_type)
253
+
254
+ csv_data.loc[col] = new_data
255
+ csv_data = csv_data.to_csv(I2V_DIR , index=False)
256
+ with open(INFO_DIR,'a') as f:
257
+ f.write(f"{model_name}\t{update_time}\t{model_publish}\t{model_resolution}\t{model_fps}\t{model_frame}\t{model_video_length}\t{model_checkpoint}\t{model_commit_id}\t{model_video_format}\n")
258
+ submission_repo.push_to_hub()
259
+ print("success update", model_name)
260
+ return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
261
+
262
+ def get_normalized_df(df):
263
+ # final_score = df.drop('name', axis=1).sum(axis=1)
264
+ # df.insert(1, 'Overall Score', final_score)
265
+ normalize_df = df.copy().fillna(0.0)
266
+ for column in normalize_df.columns[1:-5]:
267
+ min_val = NORMALIZE_DIC[column]['Min']
268
+ max_val = NORMALIZE_DIC[column]['Max']
269
+ normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
270
+ return normalize_df
271
+
272
+ def get_normalized_i2v_df(df):
273
+ normalize_df = df.copy().fillna(0.0)
274
+ for column in normalize_df.columns[1:-5]:
275
+ min_val = NORMALIZE_DIC_I2V[column]['Min']
276
+ max_val = NORMALIZE_DIC_I2V[column]['Max']
277
+ normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
278
+ return normalize_df
279
+
280
+
281
+ def calculate_selected_score(df, selected_columns):
282
+ # selected_score = df[selected_columns].sum(axis=1)
283
+ selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
284
+ selected_SEMANTIC = [i for i in selected_columns if i in SEMANTIC_LIST]
285
+ selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_QUALITY])
286
+ selected_semantic_score = df[selected_SEMANTIC].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_SEMANTIC ])
287
+ if selected_quality_score.isna().any().any() and selected_semantic_score.isna().any().any():
288
+ selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
289
+ return selected_score.fillna(0.0)
290
+ if selected_quality_score.isna().any().any():
291
+ return selected_semantic_score
292
+ if selected_semantic_score.isna().any().any():
293
+ return selected_quality_score
294
+ # print(selected_semantic_score,selected_quality_score )
295
+ selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
296
+ return selected_score.fillna(0.0)
297
+
298
+ def calculate_selected_score_i2v(df, selected_columns):
299
+ # selected_score = df[selected_columns].sum(axis=1)
300
+ selected_QUALITY = [i for i in selected_columns if i in I2V_QUALITY_LIST]
301
+ selected_I2V = [i for i in selected_columns if i in I2V_LIST]
302
+ selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_QUALITY])
303
+ selected_i2v_score = df[selected_I2V].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_I2V ])
304
+ if selected_quality_score.isna().any().any() and selected_i2v_score.isna().any().any():
305
+ selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
306
+ return selected_score.fillna(0.0)
307
+ if selected_quality_score.isna().any().any():
308
+ return selected_i2v_score
309
+ if selected_i2v_score.isna().any().any():
310
+ return selected_quality_score
311
+ # print(selected_i2v_score,selected_quality_score )
312
+ selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
313
+ return selected_score.fillna(0.0)
314
+
315
+ def get_final_score(df, selected_columns):
316
+ normalize_df = get_normalized_df(df)
317
+ #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
318
+ try:
319
+ for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1).drop("Evaluated by", axis=1).drop("Accessibility", axis=1):
320
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
321
+ except:
322
+ for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1):
323
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
324
+ quality_score = normalize_df[QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in QUALITY_LIST])
325
+ semantic_score = normalize_df[SEMANTIC_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in SEMANTIC_LIST ])
326
+ final_score = (quality_score * QUALITY_WEIGHT + semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
327
+ if 'Total Score' in df:
328
+ df['Total Score'] = final_score
329
+ else:
330
+ df.insert(1, 'Total Score', final_score)
331
+ if 'Semantic Score' in df:
332
+ df['Semantic Score'] = semantic_score
333
+ else:
334
+ df.insert(2, 'Semantic Score', semantic_score)
335
+ if 'Quality Score' in df:
336
+ df['Quality Score'] = quality_score
337
+ else:
338
+ df.insert(3, 'Quality Score', quality_score)
339
+ selected_score = calculate_selected_score(normalize_df, selected_columns)
340
+ if 'Selected Score' in df:
341
+ df['Selected Score'] = selected_score
342
+ else:
343
+ df.insert(1, 'Selected Score', selected_score)
344
+ return df
345
+
346
+ def get_final_score_i2v(df, selected_columns):
347
+ normalize_df = get_normalized_i2v_df(df)
348
+ try:
349
+ for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1).drop("Evaluated by", axis=1).drop("Accessibility", axis=1):
350
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name]
351
+ except:
352
+ for name in normalize_df.drop('Model Name (clickable)', axis=1).drop("Sampled by", axis=1).drop('Mail', axis=1).drop('Date',axis=1):
353
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name]
354
+ quality_score = normalize_df[I2V_QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_QUALITY_LIST])
355
+ i2v_score = normalize_df[I2V_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_LIST ])
356
+ final_score = (quality_score * I2V_QUALITY_WEIGHT + i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
357
+ if 'Total Score' in df:
358
+ df['Total Score'] = final_score
359
+ else:
360
+ df.insert(1, 'Total Score', final_score)
361
+ if 'I2V Score' in df:
362
+ df['I2V Score'] = i2v_score
363
+ else:
364
+ df.insert(2, 'I2V Score', i2v_score)
365
+ if 'Quality Score' in df:
366
+ df['Quality Score'] = quality_score
367
+ else:
368
+ df.insert(3, 'Quality Score', quality_score)
369
+ selected_score = calculate_selected_score_i2v(normalize_df, selected_columns)
370
+ if 'Selected Score' in df:
371
+ df['Selected Score'] = selected_score
372
+ else:
373
+ df.insert(1, 'Selected Score', selected_score)
374
+ # df.loc[df[9:].isnull().any(axis=1), ['Total Score', 'I2V Score']] = 'N.A.'
375
+ mask = df.iloc[:, 5:-5].isnull().any(axis=1)
376
+ df.loc[mask, ['Total Score', 'I2V Score','Selected Score' ]] = np.nan
377
+ # df.fillna('N.A.', inplace=True)
378
+ return df
379
+
380
+
381
+
382
+ def get_final_score_quality(df, selected_columns):
383
+ normalize_df = get_normalized_df(df)
384
+ for name in normalize_df.drop('Model Name (clickable)', axis=1):
385
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
386
+ quality_score = normalize_df[QUALITY_TAB].sum(axis=1) / sum([DIM_WEIGHT[i] for i in QUALITY_TAB])
387
+
388
+ if 'Quality Score' in df:
389
+ df['Quality Score'] = quality_score
390
+ else:
391
+ df.insert(1, 'Quality Score', quality_score)
392
+ # selected_score = normalize_df[selected_columns].sum(axis=1) / len(selected_columns)
393
+ selected_score = normalize_df[selected_columns].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_columns])
394
+ if 'Selected Score' in df:
395
+ df['Selected Score'] = selected_score
396
+ else:
397
+ df.insert(1, 'Selected Score', selected_score)
398
+ return df
399
+
400
+
401
+
402
+ def get_baseline_df():
403
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
404
+ submission_repo.git_pull()
405
+ df = pd.read_csv(CSV_DIR)
406
+ df = get_final_score(df, checkbox_group.value)
407
+ df = df.sort_values(by="Selected Score", ascending=False)
408
+ present_columns = MODEL_INFO + checkbox_group.value
409
+ # print(present_columns)
410
+ df = df[present_columns]
411
+ # Add this line to display the results evaluated by VBench by default
412
+ df = df[df['Evaluated by'] == 'VBench Team']
413
+ df = convert_scores_to_percentage(df)
414
+ return df
415
+
416
+ def get_baseline_df_quality():
417
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
418
+ submission_repo.git_pull()
419
+ df = pd.read_csv(QUALITY_DIR)
420
+ df = get_final_score_quality(df, checkbox_group_quality.value)
421
+ df = df.sort_values(by="Selected Score", ascending=False)
422
+ present_columns = MODEL_INFO_TAB_QUALITY + checkbox_group_quality.value
423
+ df = df[present_columns]
424
+ df = convert_scores_to_percentage(df)
425
+ return df
426
+
427
+ def get_baseline_df_i2v():
428
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
429
+ submission_repo.git_pull()
430
+ df = pd.read_csv(I2V_DIR)
431
+ df = get_final_score_i2v(df, checkbox_group_i2v.value)
432
+ df = df.sort_values(by="Selected Score", ascending=False)
433
+ present_columns = MODEL_INFO_TAB_I2V + checkbox_group_i2v.value
434
+ # df = df[df["Sampled by"] == 'VBench Team']
435
+ df = df[present_columns]
436
+ df = convert_scores_to_percentage(df)
437
+ return df
438
+
439
+ def get_baseline_df_long():
440
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
441
+ submission_repo.git_pull()
442
+ df = pd.read_csv(LONG_DIR)
443
+ df = get_final_score(df, checkbox_group.value)
444
+ df = df.sort_values(by="Selected Score", ascending=False)
445
+ present_columns = MODEL_INFO + checkbox_group.value
446
+ # df = df[df["Sampled by"] == 'VBench Team']
447
+ df = df[present_columns]
448
+ df = convert_scores_to_percentage(df)
449
+ return df
450
+
451
+ def get_all_df(selected_columns, dir=CSV_DIR):
452
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
453
+ submission_repo.git_pull()
454
+ df = pd.read_csv(dir)
455
+ df = get_final_score(df, selected_columns)
456
+ df = df.sort_values(by="Selected Score", ascending=False)
457
+ return df
458
+
459
+ def get_all_df_quality(selected_columns, dir=QUALITY_DIR):
460
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
461
+ submission_repo.git_pull()
462
+ df = pd.read_csv(dir)
463
+ df = get_final_score_quality(df, selected_columns)
464
+ df = df.sort_values(by="Selected Score", ascending=False)
465
+ return df
466
+
467
+ def get_all_df_i2v(selected_columns, dir=I2V_DIR):
468
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
469
+ submission_repo.git_pull()
470
+ df = pd.read_csv(dir)
471
+ df = get_final_score_i2v(df, selected_columns)
472
+ df = df.sort_values(by="Selected Score", ascending=False)
473
+ return df
474
+
475
+ def get_all_df_long(selected_columns, dir=LONG_DIR):
476
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
477
+ submission_repo.git_pull()
478
+ df = pd.read_csv(dir)
479
+ df = get_final_score(df, selected_columns)
480
+ df = df.sort_values(by="Selected Score", ascending=False)
481
+ return df
482
+
483
+
484
+ def convert_scores_to_percentage(df):
485
+ # Operate on every column in the DataFrame (except the'name 'column)
486
+ if "Sampled by" in df.columns:
487
+ skip_col =3
488
+ else:
489
+ skip_col =1
490
+ print(df)
491
+ for column in df.columns[skip_col:]: # 假设第一列是'name'
492
+ # if df[column].isdigit():
493
+ # print(df[column])
494
+ # is_numeric = pd.to_numeric(df[column], errors='coerce').notna().all()
495
+ valid_numeric_count = pd.to_numeric(df[column], errors='coerce').notna().sum()
496
+ if valid_numeric_count > 0:
497
+ df[column] = round(df[column] * 100,2)
498
+ df[column] = df[column].apply(lambda x: f"{x:05.2f}%" if pd.notna(pd.to_numeric(x, errors='coerce')) else x)
499
+ # df[column] = df[column].apply(lambda x: f"{x:05.2f}") + '%'
500
+ return df
501
+
502
+ def choose_all_quailty():
503
+ return gr.update(value=QUALITY_LIST)
504
+
505
+ def choose_all_semantic():
506
+ return gr.update(value=SEMANTIC_LIST)
507
+
508
+ def disable_all():
509
+ return gr.update(value=[])
510
+
511
+ def enable_all():
512
+ return gr.update(value=TASK_INFO)
513
+
514
+ # select function
515
+ def on_filter_model_size_method_change(selected_columns, vbench_team_sample, vbench_team_eval=False):
516
+ updated_data = get_all_df(selected_columns, CSV_DIR)
517
+ if vbench_team_sample:
518
+ updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
519
+ if vbench_team_eval:
520
+ updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
521
+ #print(updated_data)
522
+ # columns:
523
+ selected_columns = [item for item in TASK_INFO if item in selected_columns]
524
+ present_columns = MODEL_INFO + selected_columns
525
+ updated_data = updated_data[present_columns]
526
+ updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
527
+ updated_data = convert_scores_to_percentage(updated_data)
528
+ updated_headers = present_columns
529
+ print(COLUMN_NAMES,updated_headers,DATA_TITILE_TYPE )
530
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
531
+ # print(updated_data,present_columns,update_datatype)
532
+ filter_component = gr.components.Dataframe(
533
+ value=updated_data,
534
+ headers=updated_headers,
535
+ type="pandas",
536
+ datatype=update_datatype,
537
+ interactive=False,
538
+ visible=True,
539
+ )
540
+ return filter_component#.value
541
+
542
+ def on_filter_model_size_method_change_quality(selected_columns):
543
+ updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
544
+ #print(updated_data)
545
+ # columns:
546
+ selected_columns = [item for item in QUALITY_TAB if item in selected_columns]
547
+ present_columns = MODEL_INFO_TAB_QUALITY + selected_columns
548
+ updated_data = updated_data[present_columns]
549
+ updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
550
+ updated_data = convert_scores_to_percentage(updated_data)
551
+ updated_headers = present_columns
552
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
553
+ # print(updated_data,present_columns,update_datatype)
554
+ filter_component = gr.components.Dataframe(
555
+ value=updated_data,
556
+ headers=updated_headers,
557
+ type="pandas",
558
+ datatype=update_datatype,
559
+ interactive=False,
560
+ visible=True,
561
+ )
562
+ return filter_component#.value
563
+
564
+ def on_filter_model_size_method_change_i2v(selected_columns,vbench_team_sample, vbench_team_eval=False):
565
+ updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
566
+ if vbench_team_sample:
567
+ updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
568
+ # if vbench_team_eval:
569
+ # updated_data = updated_data[updated_data['Eval'] == 'VBench Team']
570
+ selected_columns = [item for item in I2V_TAB if item in selected_columns]
571
+ present_columns = MODEL_INFO_TAB_I2V + selected_columns
572
+ updated_data = updated_data[present_columns]
573
+ updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
574
+ updated_data = convert_scores_to_percentage(updated_data)
575
+ updated_headers = present_columns
576
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES_I2V.index(x)] for x in updated_headers]
577
+ # print(updated_data,present_columns,update_datatype)
578
+ filter_component = gr.components.Dataframe(
579
+ value=updated_data,
580
+ headers=updated_headers,
581
+ type="pandas",
582
+ datatype=update_datatype,
583
+ interactive=False,
584
+ visible=True,
585
+ )
586
+ return filter_component#.value
587
+
588
+ def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample, vbench_team_eval=False):
589
+ updated_data = get_all_df_long(selected_columns, LONG_DIR)
590
+ if vbench_team_sample:
591
+ updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
592
+ if vbench_team_eval:
593
+ updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
594
+ selected_columns = [item for item in TASK_INFO if item in selected_columns]
595
+ present_columns = MODEL_INFO + selected_columns
596
+ updated_data = updated_data[present_columns]
597
+ updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
598
+ updated_data = convert_scores_to_percentage(updated_data)
599
+ updated_headers = present_columns
600
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
601
+ filter_component = gr.components.Dataframe(
602
+ value=updated_data,
603
+ headers=updated_headers,
604
+ type="pandas",
605
+ datatype=update_datatype,
606
+ interactive=False,
607
+ visible=True,
608
+ )
609
+ return filter_component#.value
610
+
611
+ block = gr.Blocks()
612
+
613
+
614
+ with block:
615
+ gr.Markdown(
616
+ LEADERBORAD_INTRODUCTION
617
+ )
618
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
619
+ # Table 0
620
+ with gr.TabItem("📊 VBench", elem_id="vbench-tab-table", id=1):
621
+ with gr.Row():
622
+ with gr.Accordion("Citation", open=False):
623
+ citation_button = gr.Textbox(
624
+ value=CITATION_BUTTON_TEXT,
625
+ label=CITATION_BUTTON_LABEL,
626
+ elem_id="citation-button",
627
+ lines=14,
628
+ )
629
+
630
+ gr.Markdown(
631
+ TABLE_INTRODUCTION
632
+ )
633
+ with gr.Row():
634
+ with gr.Column(scale=0.2):
635
+ choosen_q = gr.Button("Select Quality Dimensions")
636
+ choosen_s = gr.Button("Select Semantic Dimensions")
637
+ # enable_b = gr.Button("Select All")
638
+ disable_b = gr.Button("Deselect All")
639
+
640
+ with gr.Column(scale=0.8):
641
+ vbench_team_filter = gr.Checkbox(
642
+ label="Sampled by VBench Team (Uncheck to view all submissions)",
643
+ value=False,
644
+ interactive=True
645
+ )
646
+ vbench_validate_filter = gr.Checkbox(
647
+ label="Evaluated by VBench Team (Uncheck to view all submissions)",
648
+ value=True,
649
+ interactive=True
650
+ )
651
+ # selection for column part:
652
+ checkbox_group = gr.CheckboxGroup(
653
+ choices=TASK_INFO,
654
+ value=DEFAULT_INFO,
655
+ label="Evaluation Dimension",
656
+ interactive=True,
657
+ )
658
+
659
+ data_component = gr.components.Dataframe(
660
+ value=get_baseline_df,
661
+ headers=COLUMN_NAMES,
662
+ type="pandas",
663
+ datatype=DATA_TITILE_TYPE,
664
+ interactive=False,
665
+ visible=True,
666
+ height=700,
667
+ )
668
+
669
+ choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
670
+ choosen_s.click(choose_all_semantic, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
671
+ # enable_b.click(enable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter], outputs=data_component)
672
+ disable_b.click(disable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
673
+ checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
674
+ vbench_team_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
675
+ vbench_validate_filter.change(fn=on_filter_model_size_method_change, inputs=[checkbox_group, vbench_team_filter, vbench_validate_filter], outputs=data_component)
676
+ # Table 1
677
+ with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=2):
678
+ with gr.Accordion("INSTRUCTION", open=False):
679
+ citation_button = gr.Textbox(
680
+ value=QUALITY_CLAIM_TEXT,
681
+ label="",
682
+ elem_id="quality-button",
683
+ lines=2,
684
+ )
685
+ with gr.Row():
686
+ with gr.Column(scale=1.0):
687
+ # selection for column part:
688
+
689
+ checkbox_group_quality = gr.CheckboxGroup(
690
+ choices=QUALITY_TAB,
691
+ value=QUALITY_TAB,
692
+ label="Evaluation Quality Dimension",
693
+ interactive=True,
694
+ )
695
+
696
+ data_component_quality = gr.components.Dataframe(
697
+ value=get_baseline_df_quality,
698
+ headers=COLUMN_NAMES_QUALITY,
699
+ type="pandas",
700
+ datatype=DATA_TITILE_TYPE,
701
+ interactive=False,
702
+ visible=True,
703
+ )
704
+
705
+ checkbox_group_quality.change(fn=on_filter_model_size_method_change_quality, inputs=[checkbox_group_quality], outputs=data_component_quality)
706
+
707
+ # Table i2v
708
+ with gr.TabItem("VBench-I2V", elem_id="vbench-tab-table", id=3):
709
+ with gr.Accordion("NOTE", open=False):
710
+ i2v_note_button = gr.Textbox(
711
+ value=I2V_CLAIM_TEXT,
712
+ label="",
713
+ elem_id="quality-button",
714
+ lines=3,
715
+ )
716
+ with gr.Row():
717
+ with gr.Column(scale=1.0):
718
+ # selection for column part:
719
+ with gr.Row():
720
+ vbench_team_filter_i2v = gr.Checkbox(
721
+ label="Sampled by VBench Team (Uncheck to view all submissions)",
722
+ value=False,
723
+ interactive=True
724
+ )
725
+ vbench_validate_filter_i2v = gr.Checkbox(
726
+ label="Evaluated by VBench Team (Uncheck to view all submissions)",
727
+ value=False,
728
+ interactive=True
729
+ )
730
+ checkbox_group_i2v = gr.CheckboxGroup(
731
+ choices=I2V_TAB,
732
+ value=I2V_TAB,
733
+ label="Evaluation Quality Dimension",
734
+ interactive=True,
735
+ )
736
+
737
+ data_component_i2v = gr.components.Dataframe(
738
+ value=get_baseline_df_i2v,
739
+ headers=COLUMN_NAMES_I2V,
740
+ type="pandas",
741
+ datatype=I2V_TITILE_TYPE,
742
+ interactive=False,
743
+ visible=True,
744
+ )
745
+
746
+ checkbox_group_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
747
+ vbench_team_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
748
+ vbench_validate_filter_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v, vbench_team_filter_i2v,vbench_validate_filter_i2v], outputs=data_component_i2v)
749
+
750
+ with gr.TabItem("📊 VBench-Long", elem_id="vbench-tab-table", id=4):
751
+ with gr.Row():
752
+ with gr.Accordion("INSTRUCTION", open=False):
753
+ citation_button = gr.Textbox(
754
+ value=LONG_CLAIM_TEXT,
755
+ label="",
756
+ elem_id="long-ins-button",
757
+ lines=2,
758
+ )
759
+
760
+ gr.Markdown(
761
+ TABLE_INTRODUCTION
762
+ )
763
+ with gr.Row():
764
+ with gr.Column(scale=0.2):
765
+ choosen_q_long = gr.Button("Select Quality Dimensions")
766
+ choosen_s_long = gr.Button("Select Semantic Dimensions")
767
+ enable_b_long = gr.Button("Select All")
768
+ disable_b_long = gr.Button("Deselect All")
769
+
770
+ with gr.Column(scale=0.8):
771
+ with gr.Row():
772
+ vbench_team_filter_long = gr.Checkbox(
773
+ label="Sampled by VBench Team (Uncheck to view all submissions)",
774
+ value=False,
775
+ interactive=True
776
+ )
777
+ vbench_validate_filter_long = gr.Checkbox(
778
+ label="Evaluated by VBench Team (Uncheck to view all submissions)",
779
+ value=False,
780
+ interactive=True
781
+ )
782
+ checkbox_group_long = gr.CheckboxGroup(
783
+ choices=TASK_INFO,
784
+ value=DEFAULT_INFO,
785
+ label="Evaluation Dimension",
786
+ interactive=True,
787
+ )
788
+
789
+ data_component = gr.components.Dataframe(
790
+ value=get_baseline_df_long,
791
+ headers=COLUMN_NAMES,
792
+ type="pandas",
793
+ datatype=DATA_TITILE_TYPE,
794
+ interactive=False,
795
+ visible=True,
796
+ height=700,
797
+ )
798
+
799
+ choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
800
+ choosen_s_long.click(choose_all_semantic, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
801
+ enable_b_long.click(enable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
802
+ disable_b_long.click(disable_all, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
803
+ checkbox_group_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=data_component)
804
+ vbench_team_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=data_component)
805
+ vbench_validate_filter_long.change(fn=on_filter_model_size_method_change_long, inputs=[checkbox_group_long, vbench_team_filter_long,vbench_validate_filter_long], outputs=data_component)
806
+
807
+ # table info
808
+ with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=5):
809
+ gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
810
+
811
+ # table submission
812
+ with gr.TabItem("🚀 [T2V]Submit here! ", elem_id="mvbench-tab-table", id=6):
813
+ gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
814
+
815
+ with gr.Row():
816
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
817
+
818
+ with gr.Row():
819
+ gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
820
+
821
+ with gr.Row():
822
+ gr.Markdown("Here is a required field", elem_classes="markdown-text")
823
+ with gr.Row():
824
+ with gr.Column():
825
+ model_name_textbox = gr.Textbox(
826
+ label="Model name", placeholder="Required field"
827
+ )
828
+ revision_name_textbox = gr.Textbox(
829
+ label="Revision Model Name(Optional)", placeholder="If you need to update the previous results, please fill in this line"
830
+ )
831
+ access_type = gr.Dropdown(["Open Source", "Ready to Open Source", "API", "Close"], label="Please select the way user can access your model. You can update the content by revision_name, or contact the VBench Team.")
832
+
833
+ with gr.Column():
834
+ model_link = gr.Textbox(
835
+ label="Project Page/Paper Link/Github/HuggingFace Repo", placeholder="Required field. If filling in the wrong information, your results may be removed."
836
+ )
837
+ team_name = gr.Textbox(
838
+ label="Your Team Name(If left blank, it will be user upload)", placeholder="User Upload"
839
+ )
840
+ contact_email = gr.Textbox(
841
+ label="E-Mail(Will not be displayed)", placeholder="Required field"
842
+ )
843
+ with gr.Row():
844
+ gr.Markdown("The following is optional and will be synced to [GitHub] (https://github.com/Vchitect/VBench/tree/master/sampled_videos#what-are-the-details-of-the-video-generation-models)", elem_classes="markdown-text")
845
+ with gr.Row():
846
+ release_time = gr.Textbox(label="Time of Publish", placeholder="1970-01-01")
847
+ model_resolution = gr.Textbox(label="resolution", placeholder="Width x Height")
848
+ model_fps = gr.Textbox(label="model fps", placeholder="FPS(int)")
849
+ model_frame = gr.Textbox(label="model frame count", placeholder="INT")
850
+ model_video_length = gr.Textbox(label="model video length", placeholder="float(2.0)")
851
+ model_checkpoint = gr.Textbox(label="model checkpoint", placeholder="optional")
852
+ model_commit_id = gr.Textbox(label="github commit id", placeholder='main')
853
+ model_video_format = gr.Textbox(label="pipeline format", placeholder='mp4')
854
+ with gr.Column():
855
+ input_file = gr.components.File(label = "Click to Upload a ZIP File", file_count="single", type='binary')
856
+ submit_button = gr.Button("Submit Eval")
857
+ submit_succ_button = gr.Markdown("Submit Success! Please press refresh and return to LeaderBoard!", visible=False)
858
+ fail_textbox = gr.Markdown('<span style="color:red;">Please ensure that the `Model Name`, `Project Page`, and `Email` are filled in correctly.</span>', elem_classes="markdown-text",visible=False)
859
+
860
+
861
+ submission_result = gr.Markdown()
862
+ submit_button.click(
863
+ add_new_eval,
864
+ inputs = [
865
+ input_file,
866
+ model_name_textbox,
867
+ revision_name_textbox,
868
+ model_link,
869
+ team_name,
870
+ contact_email,
871
+ release_time,
872
+ access_type,
873
+ model_resolution,
874
+ model_fps,
875
+ model_frame,
876
+ model_video_length,
877
+ model_checkpoint,
878
+ model_commit_id,
879
+ model_video_format
880
+ ],
881
+ outputs=[submit_button, submit_succ_button, fail_textbox]
882
+ )
883
+
884
+ with gr.TabItem("🚀 [I2V]Submit here! ", elem_id="mvbench-i2v-tab-table", id=7):
885
+ gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
886
+
887
+ with gr.Row():
888
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
889
+
890
+ with gr.Row():
891
+ gr.Markdown("# ✉️✨ Submit your i2v model evaluation json file here!", elem_classes="markdown-text")
892
+
893
+ with gr.Row():
894
+ gr.Markdown("Here is a required field", elem_classes="markdown-text")
895
+ with gr.Row():
896
+ with gr.Column():
897
+ model_name_textbox_i2v = gr.Textbox(
898
+ label="Model name", placeholder="Required field"
899
+ )
900
+ revision_name_textbox_i2v = gr.Textbox(
901
+ label="Revision Model Name(Optional)", placeholder="If you need to update the previous results, please fill in this line"
902
+ )
903
+ access_type_i2v = gr.Dropdown(["Open Source", "Ready to Open Source", "API", "Close"], label="Please select the way user can access your model. You can update the content by revision_name, or contact the VBench Team.")
904
+
905
+
906
+ with gr.Column():
907
+ model_link_i2v = gr.Textbox(
908
+ label="Project Page/Paper Link/Github/HuggingFace Repo", placeholder="Required field. If filling in the wrong information, your results may be removed."
909
+ )
910
+ team_name_i2v = gr.Textbox(
911
+ label="Your Team Name(If left blank, it will be user upload)", placeholder="User Upload"
912
+ )
913
+ contact_email_i2v = gr.Textbox(
914
+ label="E-Mail(Will not be displayed)", placeholder="Required field"
915
+ )
916
+ with gr.Row():
917
+ gr.Markdown("The following is optional and will be synced to [GitHub] (https://github.com/Vchitect/VBench/tree/master/sampled_videos#what-are-the-details-of-the-video-generation-models)", elem_classes="markdown-text")
918
+ with gr.Row():
919
+ release_time_i2v = gr.Textbox(label="Time of Publish", placeholder="1970-01-01")
920
+ model_resolution_i2v = gr.Textbox(label="resolution", placeholder="Width x Height")
921
+ model_fps_i2v = gr.Textbox(label="model fps", placeholder="FPS(int)")
922
+ model_frame_i2v = gr.Textbox(label="model frame count", placeholder="INT")
923
+ model_video_length_i2v = gr.Textbox(label="model video length", placeholder="float(2.0)")
924
+ model_checkpoint_i2v = gr.Textbox(label="model checkpoint", placeholder="optional")
925
+ model_commit_id_i2v = gr.Textbox(label="github commit id", placeholder='main')
926
+ model_video_format_i2v = gr.Textbox(label="pipeline format", placeholder='mp4')
927
+ with gr.Column():
928
+ input_file_i2v = gr.components.File(label = "Click to Upload a ZIP File", file_count="single", type='binary')
929
+ submit_button_i2v = gr.Button("Submit Eval")
930
+ submit_succ_button_i2v = gr.Markdown("Submit Success! Please press refresh and retfurn to LeaderBoard!", visible=False)
931
+ fail_textbox_i2v = gr.Markdown('<span style="color:red;">Please ensure that the `Model Name`, `Project Page`, and `Email` are filled in correctly.</span>', elem_classes="markdown-text",visible=False)
932
+
933
+
934
+ submission_result_i2v = gr.Markdown()
935
+ submit_button_i2v.click(
936
+ add_new_eval_i2v,
937
+ inputs = [
938
+ input_file_i2v,
939
+ model_name_textbox_i2v,
940
+ revision_name_textbox_i2v,
941
+ model_link_i2v,
942
+ team_name_i2v,
943
+ contact_email_i2v,
944
+ release_time_i2v,
945
+ access_type_i2v,
946
+ model_resolution_i2v,
947
+ model_fps_i2v,
948
+ model_frame_i2v,
949
+ model_video_length_i2v,
950
+ model_checkpoint_i2v,
951
+ model_commit_id_i2v,
952
+ model_video_format_i2v
953
+ ],
954
+ outputs=[submit_button_i2v, submit_succ_button_i2v, fail_textbox_i2v]
955
+ )
956
+
957
+
958
+
959
+ def refresh_data():
960
+ value1 = get_baseline_df()
961
+ return value1
962
+
963
+ with gr.Row():
964
+ data_run = gr.Button("Refresh")
965
+ data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)
966
+
967
+
968
+ block.launch()
constants.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ # this is .py for store constants
3
+ MODEL_INFO = [
4
+ "Model Name (clickable)",
5
+ "Sampled by",
6
+ "Evaluated by",
7
+ "Accessibility",
8
+ "Date",
9
+ "Total Score",
10
+ "Quality Score",
11
+ "Semantic Score",
12
+ "Selected Score",
13
+ ]
14
+
15
+ MODEL_INFO_TAB_QUALITY = [
16
+ "Model Name (clickable)",
17
+ "Quality Score",
18
+ "Selected Score"
19
+ ]
20
+
21
+ MODEL_INFO_TAB_I2V = [
22
+ "Model Name (clickable)",
23
+ "Sampled by",
24
+ "Evaluated by",
25
+ "Accessibility",
26
+ "Date",
27
+ "Total Score",
28
+ "I2V Score",
29
+ "Quality Score",
30
+ "Selected Score"
31
+ ]
32
+
33
+ TASK_INFO = [
34
+ "subject consistency",
35
+ "background consistency",
36
+ "temporal flickering",
37
+ "motion smoothness",
38
+ "dynamic degree",
39
+ "aesthetic quality",
40
+ "imaging quality",
41
+ "object class",
42
+ "multiple objects",
43
+ "human action",
44
+ "color",
45
+ "spatial relationship",
46
+ "scene",
47
+ "appearance style",
48
+ "temporal style",
49
+ "overall consistency"
50
+ ]
51
+
52
+ DEFAULT_INFO = [
53
+ "subject consistency",
54
+ "background consistency",
55
+ "temporal flickering",
56
+ "motion smoothness",
57
+ "dynamic degree",
58
+ "aesthetic quality",
59
+ "imaging quality",
60
+ "object class",
61
+ "multiple objects",
62
+ "human action",
63
+ "color",
64
+ "spatial relationship",
65
+ "scene",
66
+ "appearance style",
67
+ "temporal style",
68
+ "overall consistency"
69
+ ]
70
+
71
+ QUALITY_LIST = [
72
+ "subject consistency",
73
+ "background consistency",
74
+ "temporal flickering",
75
+ "motion smoothness",
76
+ "aesthetic quality",
77
+ "imaging quality",
78
+ "dynamic degree",]
79
+
80
+ SEMANTIC_LIST = [
81
+ "object class",
82
+ "multiple objects",
83
+ "human action",
84
+ "color",
85
+ "spatial relationship",
86
+ "scene",
87
+ "appearance style",
88
+ "temporal style",
89
+ "overall consistency"
90
+ ]
91
+
92
+ QUALITY_TAB = [
93
+ "subject consistency",
94
+ "background consistency",
95
+ "motion smoothness",
96
+ "aesthetic quality",
97
+ "imaging quality",
98
+ "dynamic degree",]
99
+
100
+ I2V_LIST = [
101
+ "Video-Text Camera Motion",
102
+ "Video-Image Subject Consistency",
103
+ "Video-Image Background Consistency",
104
+ ]
105
+
106
+ I2V_QUALITY_LIST = [
107
+ "Subject Consistency",
108
+ "Background Consistency",
109
+ "Motion Smoothness",
110
+ "Dynamic Degree",
111
+ "Aesthetic Quality",
112
+ "Imaging Quality",
113
+ # "Temporal Flickering"
114
+ ]
115
+
116
+ I2V_TAB = [
117
+ "Video-Text Camera Motion",
118
+ "Video-Image Subject Consistency",
119
+ "Video-Image Background Consistency",
120
+ "Subject Consistency",
121
+ "Background Consistency",
122
+ "Motion Smoothness",
123
+ "Dynamic Degree",
124
+ "Aesthetic Quality",
125
+ "Imaging Quality",
126
+ # "Temporal Flickering"
127
+ ]
128
+
129
+ DIM_WEIGHT = {
130
+ "subject consistency":1,
131
+ "background consistency":1,
132
+ "temporal flickering":1,
133
+ "motion smoothness":1,
134
+ "aesthetic quality":1,
135
+ "imaging quality":1,
136
+ "dynamic degree":0.5,
137
+ "object class":1,
138
+ "multiple objects":1,
139
+ "human action":1,
140
+ "color":1,
141
+ "spatial relationship":1,
142
+ "scene":1,
143
+ "appearance style":1,
144
+ "temporal style":1,
145
+ "overall consistency":1
146
+ }
147
+
148
+ DIM_WEIGHT_I2V = {
149
+ "Video-Text Camera Motion": 0.1,
150
+ "Video-Image Subject Consistency": 1,
151
+ "Video-Image Background Consistency": 1,
152
+ "Subject Consistency": 1,
153
+ "Background Consistency": 1,
154
+ "Motion Smoothness": 1,
155
+ "Dynamic Degree": 0.5,
156
+ "Aesthetic Quality": 1,
157
+ "Imaging Quality": 1,
158
+ "Temporal Flickering": 1
159
+ }
160
+
161
+ SEMANTIC_WEIGHT = 1
162
+ QUALITY_WEIGHT = 4
163
+ I2V_WEIGHT = 1.0
164
+ I2V_QUALITY_WEIGHT = 1.0
165
+
166
+ DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
167
+ I2V_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
168
+
169
+ SUBMISSION_NAME = "vbench_leaderboard_submission"
170
+ SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Vchitect/", SUBMISSION_NAME)
171
+ CSV_DIR = "./vbench_leaderboard_submission/results.csv"
172
+ QUALITY_DIR = "./vbench_leaderboard_submission/quality.csv"
173
+ I2V_DIR = "./vbench_leaderboard_submission/i2v_results.csv"
174
+ LONG_DIR = "./vbench_leaderboard_submission/long_debug.csv"
175
+ INFO_DIR = "./vbench_leaderboard_submission/model_info.csv"
176
+
177
+ COLUMN_NAMES = MODEL_INFO + TASK_INFO
178
+ COLUMN_NAMES_QUALITY = MODEL_INFO_TAB_QUALITY + QUALITY_TAB
179
+ COLUMN_NAMES_I2V = MODEL_INFO_TAB_I2V + I2V_TAB
180
+
181
+ LEADERBORAD_INTRODUCTION = """# VBench Leaderboard
182
+
183
+ *"Which Video Generation Model is better?"*
184
+ 🏆 Welcome to the leaderboard of the **VBench**! 🎦 *A Comprehensive Benchmark Suite for Video Generative Models* (**CVPR 2024 Spotlight**) [![Code](https://img.shields.io/github/stars/Vchitect/VBench.svg?style=social&label=Official)](https://github.com/Vchitect/VBench)
185
+ <div style="display: flex; flex-wrap: wrap; align-items: center; gap: 10px;">
186
+ <a href='https://arxiv.org/abs/2311.17982'><img src='https://img.shields.io/badge/cs.CV-Paper-b31b1b?logo=arxiv&logoColor=red'></a>
187
+ <a href='https://vchitect.github.io/VBench-project/'><img src='https://img.shields.io/badge/VBench-Website-green?logo=googlechrome&logoColor=green'></a>
188
+ <a href='https://pypi.org/project/vbench/'><img src='https://img.shields.io/pypi/v/vbench'></a>
189
+ <a href='https://www.youtube.com/watch?v=7IhCC8Qqn8Y'><img src='https://img.shields.io/badge/YouTube-Video-c4302b?logo=youtube&logoColor=red'></a>
190
+ <a href='https://hits.seeyoufarm.com'><img src='https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2FVchitect%2FVBench&count_bg=%23FFA500&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=visitors&edge_flat=false'></a>
191
+ </div>
192
+
193
+ - **Comprehensive Dimensions:** We carefully decompose video generation quality into 16 comprehensive dimensions to reveal individual model's strengths and weaknesses.
194
+ - **Human Alignment:** We conducted extensive experiments and human annotations to validate robustness of VBench.
195
+ - **Valuable Insights:** VBench provides multi-perspective insights useful for the community.
196
+
197
+ **Join Leaderboard**: Please see the [instructions](https://github.com/Vchitect/VBench/tree/master?tab=readme-ov-file#trophy-leaderboard) for 3 options to participate. One option is to follow [VBench Usage info](https://github.com/Vchitect/VBench?tab=readme-ov-file#usage), and upload the generated `result.json` file here. After clicking the `Submit here!` button, click the `Refresh` button.
198
+
199
+ **Model Information**: What are the details of these Video Generation Models? See [HERE](https://github.com/Vchitect/VBench/tree/master/sampled_videos#what-are-the-details-of-the-video-generation-models)
200
+
201
+ **Credits**: This leaderboard is updated and maintained by the team of [VBench Contributors](https://github.com/Vchitect/VBench?tab=readme-ov-file#muscle-vbench-contributors).
202
+ """
203
+
204
+ SUBMIT_INTRODUCTION = """# Submit on VBench Benchmark Introduction
205
+
206
+ ## 🎈
207
+ 1. Please note that you need to obtain the file `evaluation_results/*.json` by running VBench in Github. You may conduct an [Offline Check](https://github.com/Vchitect/VBench?tab=readme-ov-file#get-final-score-and-submit-to-leaderboard) before uploading.
208
+ 2. Then, pack these JSON files into a `ZIP` archive, ensuring that the top-level directory of the ZIP contains the individual JSON files.
209
+ 3. Finally, upload the ZIP archive below.
210
+
211
+ ⚠️ Uploading generated videos or images of the model is invalid!
212
+ ⚠️ Submissions that do not correctly fill in the model name and model link may be deleted by the VBench team. The contact information you filled in will not be made public.
213
+ """
214
+
215
+ TABLE_INTRODUCTION = """
216
+ """
217
+
218
+ LEADERBORAD_INFO = """
219
+ VBench, a comprehensive benchmark suite for video generative models. We design a comprehensive and hierarchical Evaluation Dimension Suite to decompose "video generation quality" into multiple well-defined dimensions to facilitate fine-grained and objective evaluation. For each dimension and each content category, we carefully design a Prompt Suite as test cases, and sample Generated Videos from a set of video generation models. For each evaluation dimension, we specifically design an Evaluation Method Suite, which uses carefully crafted method or designated pipeline for automatic objective evaluation. We also conduct Human Preference Annotation for the generated videos for each dimension, and show that VBench evaluation results are well aligned with human perceptions. VBench can provide valuable insights from multiple perspectives.
220
+ """
221
+
222
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
223
+ CITATION_BUTTON_TEXT = r"""@inproceedings{huang2023vbench,
224
+ title={{VBench}: Comprehensive Benchmark Suite for Video Generative Models},
225
+ author={Huang, Ziqi and He, Yinan and Yu, Jiashuo and Zhang, Fan and Si, Chenyang and Jiang, Yuming and Zhang, Yuanhan and Wu, Tianxing and Jin, Qingyang and Chanpaisit, Nattapol and Wang, Yaohui and Chen, Xinyuan and Wang, Limin and Lin, Dahua and Qiao, Yu and Liu, Ziwei},
226
+ booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
227
+ year={2024}
228
+ }"""
229
+
230
+ QUALITY_CLAIM_TEXT = "We use all the videos on Sora website (https://openai.com/sora) for a preliminary evaluation, including the failure case videos Sora provided."
231
+
232
+ I2V_CLAIM_TEXT = "Since the open-sourced SVD models do not accept text input during the I2V stage, we are unable to evaluate its `camera motion` in terms of `video-text consistency`. The total score is calculated based on all dimensions except `camera motion`."
233
+
234
+ LONG_CLAIM_TEXT = ""
235
+
236
+ NORMALIZE_DIC = {
237
+ "subject consistency": {"Min": 0.1462, "Max": 1.0},
238
+ "background consistency": {"Min": 0.2615, "Max": 1.0},
239
+ "temporal flickering": {"Min": 0.6293, "Max": 1.0},
240
+ "motion smoothness": {"Min": 0.706, "Max": 0.9975},
241
+ "dynamic degree": {"Min": 0.0, "Max": 1.0},
242
+ "aesthetic quality": {"Min": 0.0, "Max": 1.0},
243
+ "imaging quality": {"Min": 0.0, "Max": 1.0},
244
+ "object class": {"Min": 0.0, "Max": 1.0},
245
+ "multiple objects": {"Min": 0.0, "Max": 1.0},
246
+ "human action": {"Min": 0.0, "Max": 1.0},
247
+ "color": {"Min": 0.0, "Max": 1.0},
248
+ "spatial relationship": {"Min": 0.0, "Max": 1.0},
249
+ "scene": {"Min": 0.0, "Max": 0.8222},
250
+ "appearance style": {"Min": 0.0009, "Max": 0.2855},
251
+ "temporal style": {"Min": 0.0, "Max": 0.364},
252
+ "overall consistency": {"Min": 0.0, "Max": 0.364}
253
+ }
254
+
255
+ NORMALIZE_DIC_I2V = {
256
+ "Video-Text Camera Motion" :{"Min": 0.0, "Max":1.0 },
257
+ "Video-Image Subject Consistency":{"Min": 0.1462, "Max": 1.0},
258
+ "Video-Image Background Consistency":{"Min": 0.2615, "Max":1.0 },
259
+ "Subject Consistency":{"Min": 0.1462, "Max": 1.0},
260
+ "Background Consistency":{"Min": 0.2615, "Max": 1.0 },
261
+ "Motion Smoothness":{"Min": 0.7060, "Max": 0.9975},
262
+ "Dynamic Degree":{"Min": 0.0, "Max": 1.0},
263
+ "Aesthetic Quality":{"Min": 0.0, "Max": 1.0},
264
+ "Imaging Quality":{"Min": 0.0, "Max": 1.0},
265
+ "Temporal Flickering":{"Min":0.6293, "Max": 1.0}
266
+ }
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==3.23.0
2
+ numpy
3
+ pandas