dongsheng commited on
Commit
5fe7967
·
verified ·
1 Parent(s): 78dc35c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -130
app.py CHANGED
@@ -1,12 +1,9 @@
1
  import gradio as gr
2
  import json
3
  import pandas as pd
4
- from collections import defaultdict
5
- import copy as cp
6
  from urllib.request import urlopen, URLError
7
  import re
8
  from datetime import datetime
9
- import time
10
 
11
  # Constants
12
  CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
@@ -14,23 +11,16 @@ CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
14
  author={OpenCompass Contributors},
15
  howpublished = {\url{https://github.com/open-compass/opencompass}},
16
  year={2023}
17
- },
18
  }"""
19
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
20
- OPENCOMPASS_README = (
21
- 'https://raw.githubusercontent.com/open-compass/opencompass/main/README.md'
22
- )
23
- GITHUB_REPO = 'https://github.com/open-compass/opencompass'
24
- GITHUB_RAW = 'https://raw.githubusercontent.com/open-compass/opencompass'
25
- GITHUB_BLOB = 'https://github.com/open-compass/opencompass/blob'
26
-
27
- # Base URL for the JSON data
28
  DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."
29
 
30
  def find_latest_data_url():
31
  """Find the latest available data URL by trying different dates."""
32
  today = datetime.now()
33
- # Try last 365 days
34
  for i in range(365):
35
  date = today.replace(day=today.day - i)
36
  date_str = date.strftime("%Y%m%d")
@@ -40,7 +30,6 @@ def find_latest_data_url():
40
  return url, date_str
41
  except URLError:
42
  continue
43
- # If no valid URL found, return None
44
  return None, None
45
 
46
  def get_latest_data():
@@ -51,7 +40,6 @@ def get_latest_data():
51
  formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
52
  return data_url, formatted_update_time
53
 
54
- # Markdown content
55
  def get_leaderboard_title(update_time):
56
  return f"# CompassAcademic Leaderboard (Last Updated: {update_time})"
57
 
@@ -62,72 +50,36 @@ The CompassAcademic currently focuses on the comprehensive reasoning abilities o
62
  - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
63
  """
64
 
65
- def fix_image_urls(content):
66
- """Fix image URLs in markdown content."""
67
- # Handle the specific logo.svg path
68
- content = content.replace(
69
- 'docs/en/_static/image/logo.svg',
70
- 'https://raw.githubusercontent.com/open-compass/opencompass/main/docs/en/_static/image/logo.svg',
71
- )
72
-
73
- # Replace other relative image paths with absolute GitHub URLs
74
- content = re.sub(
75
- r'!\[[^\]]*\]\((?!http)([^)]+)\)',
76
- lambda m: f'![{m.group(0)}](https://raw.githubusercontent.com/open-compass/opencompass/main/{m.group(1)})',
77
- content,
78
- )
79
-
80
- return content
81
-
82
-
83
  MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown']
84
  MODEL_TYPE = ['API', 'OpenSource']
85
 
86
-
87
  def load_data(data_url):
88
  response = urlopen(data_url)
89
  data = json.loads(response.read().decode('utf-8'))
90
  return data
91
 
92
-
93
  def build_main_table(data):
94
  df = pd.DataFrame(data['globalData']['OverallTable'])
95
-
96
- # Add OpenSource column based on models data
97
  models_data = data['models']
98
  df['OpenSource'] = df['model'].apply(
99
  lambda x: 'Yes' if models_data[x]['release'] == 'OpenSource' else 'No'
100
  )
101
-
102
- # Add Rank column based on Average Score
103
  df['Rank'] = df['Average'].rank(ascending=False, method='min').astype(int)
104
-
105
  columns = {
106
- 'Rank': 'Rank',
107
- 'model': 'Model',
108
- 'org': 'Organization',
109
- 'num': 'Parameters',
110
- 'OpenSource': 'OpenSource',
111
- 'Average': 'Average Score',
112
- 'BBH': 'BBH',
113
- 'Math-500': 'Math-500',
114
- 'AIME': 'AIME',
115
- 'MMLU-Pro': 'MMLU-Pro',
116
- 'LiveCodeBench': 'LiveCodeBench',
117
- 'HumanEval': 'HumanEval',
118
- 'GQPA-Diamond': 'GQPA-Diamond',
119
- 'IFEval': 'IFEval',
120
  }
121
  df = df[list(columns.keys())].rename(columns=columns)
122
  return df
123
 
124
-
125
  def filter_table(df, size_ranges, model_types):
126
  filtered_df = df.copy()
127
-
128
- # Filter by size
129
  if size_ranges:
130
-
131
  def get_size_in_B(param):
132
  if param == 'N/A':
133
  return None
@@ -135,30 +87,23 @@ def filter_table(df, size_ranges, model_types):
135
  return float(param.replace('B', ''))
136
  except:
137
  return None
138
-
139
- filtered_df['size_in_B'] = filtered_df['Parameters'].apply(
140
- get_size_in_B
141
- )
142
-
143
  mask = pd.Series(False, index=filtered_df.index)
 
144
  for size_range in size_ranges:
145
  if size_range == '<10B':
146
- mask |= (filtered_df['size_in_B'] < 10) & (
147
- filtered_df['size_in_B'].notna()
148
- )
149
  elif size_range == '10B-70B':
150
- mask |= (filtered_df['size_in_B'] >= 10) & (
151
- filtered_df['size_in_B'] < 70
152
- )
153
  elif size_range == '>70B':
154
  mask |= filtered_df['size_in_B'] >= 70
155
  elif size_range == 'Unknown':
156
  mask |= filtered_df['size_in_B'].isna()
157
-
158
  filtered_df = filtered_df[mask]
159
  filtered_df.drop('size_in_B', axis=1, inplace=True)
160
-
161
- # Filter by model type
162
  if model_types:
163
  type_mask = pd.Series(False, index=filtered_df.index)
164
  for model_type in model_types:
@@ -167,49 +112,79 @@ def filter_table(df, size_ranges, model_types):
167
  elif model_type == 'OpenSource':
168
  type_mask |= filtered_df['OpenSource'] == 'Yes'
169
  filtered_df = filtered_df[type_mask]
170
-
171
  return filtered_df
172
 
173
-
174
  def calculate_column_widths(df):
175
- """Dynamically calculate column widths based on content length."""
176
  column_widths = []
177
-
178
  for column in df.columns:
179
- # Get max length of column name and values
180
  header_length = len(str(column))
181
  max_content_length = df[column].astype(str).map(len).max()
182
-
183
- # Use the larger of header or content length
184
- # Multiply by average character width (approximately 8 pixels)
185
- # Add padding (20 pixels)
186
- # Increase the multiplier for header length to ensure it fits
187
  width = max(header_length * 10, max_content_length * 8) + 20
188
-
189
- # Set minimum width (200 pixels)
190
- width = max(160, width)
191
-
192
- # Set maximum width (400 pixels) to prevent extremely wide columns
193
- width = min(400, width)
194
-
195
  column_widths.append(width)
196
-
197
  return column_widths
198
 
 
 
 
 
 
199
 
200
  def create_interface():
201
- data_url, update_time = get_latest_data()
202
- data = load_data(data_url)
203
- df = build_main_table(data)
204
- title = gr.Markdown(get_leaderboard_title(update_time))
 
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  with gr.Blocks() as demo:
207
- title_comp = gr.Markdown(get_leaderboard_title(update_time))
208
-
209
  with gr.Tabs() as tabs:
210
  with gr.TabItem("🏅 Main Leaderboard", elem_id='main'):
211
  gr.Markdown(MAIN_LEADERBOARD_DESCRIPTION)
212
-
213
  with gr.Row():
214
  with gr.Column():
215
  size_filter = gr.CheckboxGroup(
@@ -225,52 +200,47 @@ def create_interface():
225
  label='Model Type',
226
  interactive=True,
227
  )
228
-
229
  with gr.Column():
230
  table = gr.DataFrame(
231
- value=df.sort_values("Average Score", ascending=False),
232
  interactive=False,
233
- wrap=False, # 禁用自动换行
234
- column_widths=calculate_column_widths(df),
235
- )
236
-
237
- def update_data():
238
- """Periodically check for new data and update the interface"""
239
- while True:
240
- time.sleep(300) # Check every 5 minutes
241
- try:
242
- new_data_url, new_update_time = get_latest_data()
243
- if new_data_url != data_url:
244
- new_data = load_data(new_data_url)
245
- new_df = build_main_table(new_data)
246
- filtered_df = filter_table(new_df, size_filter.value, type_filter.value)
247
- title_comp.value = get_leaderboard_title(new_update_time)
248
- table.value = filtered_df.sort_values("Average Score", ascending=False)
249
- except Exception as e:
250
- print(f"Error updating data: {e}")
251
- continue
252
-
253
- def update_table(size_ranges, model_types):
254
- filtered_df = filter_table(df, size_ranges, model_types)
255
- return filtered_df.sort_values(
256
- "Average Score", ascending=False
257
  )
258
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  size_filter.change(
260
  fn=update_table,
261
  inputs=[size_filter, type_filter],
262
  outputs=table,
263
  )
264
-
265
  type_filter.change(
266
  fn=update_table,
267
  inputs=[size_filter, type_filter],
268
  outputs=table,
269
  )
270
 
271
- # Set up periodic data update
272
- demo.load(update_data)
273
-
274
  with gr.Row():
275
  with gr.Accordion("Citation", open=False):
276
  citation_button = gr.Textbox(
@@ -281,7 +251,7 @@ def create_interface():
281
 
282
  return demo
283
 
284
-
285
  if __name__ == '__main__':
286
  demo = create_interface()
287
- demo.launch(server_name='0.0.0.0')
 
 
1
  import gradio as gr
2
  import json
3
  import pandas as pd
 
 
4
  from urllib.request import urlopen, URLError
5
  import re
6
  from datetime import datetime
 
7
 
8
  # Constants
9
  CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
 
11
  author={OpenCompass Contributors},
12
  howpublished = {\url{https://github.com/open-compass/opencompass}},
13
  year={2023}
 
14
  }"""
15
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
16
+ # 开发环境
17
+ # DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/research-rank/research-data.REALTIME."
18
+ # 生产环境
 
 
 
 
 
19
  DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."
20
 
21
  def find_latest_data_url():
22
  """Find the latest available data URL by trying different dates."""
23
  today = datetime.now()
 
24
  for i in range(365):
25
  date = today.replace(day=today.day - i)
26
  date_str = date.strftime("%Y%m%d")
 
30
  return url, date_str
31
  except URLError:
32
  continue
 
33
  return None, None
34
 
35
  def get_latest_data():
 
40
  formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
41
  return data_url, formatted_update_time
42
 
 
43
  def get_leaderboard_title(update_time):
44
  return f"# CompassAcademic Leaderboard (Last Updated: {update_time})"
45
 
 
50
  - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆.
51
  """
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown']
54
  MODEL_TYPE = ['API', 'OpenSource']
55
 
 
56
  def load_data(data_url):
57
  response = urlopen(data_url)
58
  data = json.loads(response.read().decode('utf-8'))
59
  return data
60
 
 
61
  def build_main_table(data):
62
  df = pd.DataFrame(data['globalData']['OverallTable'])
 
 
63
  models_data = data['models']
64
  df['OpenSource'] = df['model'].apply(
65
  lambda x: 'Yes' if models_data[x]['release'] == 'OpenSource' else 'No'
66
  )
 
 
67
  df['Rank'] = df['Average'].rank(ascending=False, method='min').astype(int)
68
+
69
  columns = {
70
+ 'Rank': 'Rank', 'model': 'Model', 'org': 'Organization', 'num': 'Parameters',
71
+ 'OpenSource': 'OpenSource', 'Average': 'Average Score', 'BBH': 'BBH',
72
+ 'Math-500': 'Math-500', 'AIME': 'AIME', 'MMLU-Pro': 'MMLU-Pro',
73
+ 'LiveCodeBench': 'LiveCodeBench', 'HumanEval': 'HumanEval',
74
+ 'GQPA-Diamond': 'GQPA-Diamond', 'IFEval': 'IFEval',
 
 
 
 
 
 
 
 
 
75
  }
76
  df = df[list(columns.keys())].rename(columns=columns)
77
  return df
78
 
 
79
  def filter_table(df, size_ranges, model_types):
80
  filtered_df = df.copy()
81
+
 
82
  if size_ranges:
 
83
  def get_size_in_B(param):
84
  if param == 'N/A':
85
  return None
 
87
  return float(param.replace('B', ''))
88
  except:
89
  return None
90
+
91
+ filtered_df['size_in_B'] = filtered_df['Parameters'].apply(get_size_in_B)
 
 
 
92
  mask = pd.Series(False, index=filtered_df.index)
93
+
94
  for size_range in size_ranges:
95
  if size_range == '<10B':
96
+ mask |= (filtered_df['size_in_B'] < 10) & (filtered_df['size_in_B'].notna())
 
 
97
  elif size_range == '10B-70B':
98
+ mask |= (filtered_df['size_in_B'] >= 10) & (filtered_df['size_in_B'] < 70)
 
 
99
  elif size_range == '>70B':
100
  mask |= filtered_df['size_in_B'] >= 70
101
  elif size_range == 'Unknown':
102
  mask |= filtered_df['size_in_B'].isna()
103
+
104
  filtered_df = filtered_df[mask]
105
  filtered_df.drop('size_in_B', axis=1, inplace=True)
106
+
 
107
  if model_types:
108
  type_mask = pd.Series(False, index=filtered_df.index)
109
  for model_type in model_types:
 
112
  elif model_type == 'OpenSource':
113
  type_mask |= filtered_df['OpenSource'] == 'Yes'
114
  filtered_df = filtered_df[type_mask]
115
+
116
  return filtered_df
117
 
 
118
  def calculate_column_widths(df):
 
119
  column_widths = []
 
120
  for column in df.columns:
 
121
  header_length = len(str(column))
122
  max_content_length = df[column].astype(str).map(len).max()
 
 
 
 
 
123
  width = max(header_length * 10, max_content_length * 8) + 20
124
+ width = max(160, min(400, width))
 
 
 
 
 
 
125
  column_widths.append(width)
 
126
  return column_widths
127
 
128
+ class DataState:
129
+ def __init__(self):
130
+ self.current_df = None
131
+
132
+ data_state = DataState()
133
 
134
  def create_interface():
135
+ empty_df = pd.DataFrame(columns=[
136
+ 'Rank', 'Model', 'Organization', 'Parameters', 'OpenSource',
137
+ 'Average Score', 'BBH', 'Math-500', 'AIME', 'MMLU-Pro',
138
+ 'LiveCodeBench', 'HumanEval', 'GQPA-Diamond', 'IFEval'
139
+ ])
140
 
141
+ def load_initial_data():
142
+ try:
143
+ data_url, update_time = get_latest_data()
144
+ data = load_data(data_url)
145
+ new_df = build_main_table(data)
146
+ data_state.current_df = new_df
147
+ filtered_df = filter_table(new_df, MODEL_SIZE, MODEL_TYPE)
148
+ return get_leaderboard_title(update_time), filtered_df.sort_values("Average Score", ascending=False)
149
+ except Exception as e:
150
+ print(f"Error loading initial data: {e}")
151
+ return "# CompassAcademic Leaderboard (Error loading data)", empty_df
152
+
153
+ def refresh_data():
154
+ try:
155
+ data_url, update_time = get_latest_data()
156
+ data = load_data(data_url)
157
+ new_df = build_main_table(data)
158
+ data_state.current_df = new_df
159
+ filtered_df = filter_table(new_df, MODEL_SIZE, MODEL_TYPE)
160
+ return get_leaderboard_title(update_time), filtered_df.sort_values("Average Score", ascending=False)
161
+ except Exception as e:
162
+ print(f"Error refreshing data: {e}")
163
+ return None, None
164
+
165
+ def auto_refresh():
166
+ """Single refresh function for automatic updates"""
167
+ title, data = refresh_data()
168
+ status = f"Last auto update: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
169
+ if title and data is not None:
170
+ return title, data, status
171
+ return None, None, None
172
+
173
+ def update_table(size_ranges, model_types):
174
+ if data_state.current_df is None:
175
+ return empty_df
176
+ filtered_df = filter_table(data_state.current_df, size_ranges, model_types)
177
+ return filtered_df.sort_values("Average Score", ascending=False)
178
+
179
+ initial_title, initial_data = load_initial_data()
180
+
181
  with gr.Blocks() as demo:
182
+ title_comp = gr.Markdown(initial_title)
183
+
184
  with gr.Tabs() as tabs:
185
  with gr.TabItem("🏅 Main Leaderboard", elem_id='main'):
186
  gr.Markdown(MAIN_LEADERBOARD_DESCRIPTION)
187
+
188
  with gr.Row():
189
  with gr.Column():
190
  size_filter = gr.CheckboxGroup(
 
200
  label='Model Type',
201
  interactive=True,
202
  )
203
+
204
  with gr.Column():
205
  table = gr.DataFrame(
206
+ value=initial_data,
207
  interactive=False,
208
+ wrap=False,
209
+ column_widths=calculate_column_widths(initial_data),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  )
211
+
212
+ refresh_button = gr.Button("Refresh Data")
213
+ update_status = gr.Markdown("Last update: " + datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
214
+
215
+ def refresh_and_update():
216
+ title, data = refresh_data()
217
+ status = f"Last manual update: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
218
+ return title, data, status
219
+
220
+ refresh_button.click(
221
+ fn=refresh_and_update,
222
+ outputs=[title_comp, table, update_status],
223
+ )
224
+
225
+ # 添加自动更新功能
226
+ demo.load(
227
+ fn=auto_refresh,
228
+ outputs=[title_comp, table, update_status],
229
+ every=21600 # 每6小时尝试更新一次
230
+ )
231
+
232
  size_filter.change(
233
  fn=update_table,
234
  inputs=[size_filter, type_filter],
235
  outputs=table,
236
  )
237
+
238
  type_filter.change(
239
  fn=update_table,
240
  inputs=[size_filter, type_filter],
241
  outputs=table,
242
  )
243
 
 
 
 
244
  with gr.Row():
245
  with gr.Accordion("Citation", open=False):
246
  citation_button = gr.Textbox(
 
251
 
252
  return demo
253
 
 
254
  if __name__ == '__main__':
255
  demo = create_interface()
256
+ demo.queue()
257
+ demo.launch(server_name='0.0.0.0')