anonymous-researcher912 commited on
Commit
968f8be
·
1 Parent(s): 3dc6935

update new data

Browse files
app.py CHANGED
@@ -1,13 +1,20 @@
1
  import streamlit as st
2
  import datasets
 
 
 
 
 
3
 
4
  humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
 
 
5
  st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")
6
 
7
- st.markdown("> <i>This is a viewer for the HumanEval-V benchmark, which consists of 108 coding tasks. Use the navigation buttons or enter an index to browse the tasks. If you encounter any issues, we encourage you to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True)
8
  st.markdown("---")
9
 
10
- max_index = 108
11
 
12
  # Initialize session state for index if not present
13
  if 'index' not in st.session_state:
@@ -23,38 +30,51 @@ with buttons[1]:
23
  index_input = st.number_input(
24
  f"Go to index (1-{max_index}):",
25
  min_value=1,
26
- max_value=108,
27
  value=st.session_state.index,
28
  key="index_input",
29
  help="Enter an index and jump to that index.",
30
  step=1
31
  )
32
 
33
- coding_task = humaneval_v_data[index_input-1]
34
  qid = coding_task["qid"]
35
- image = coding_task["image"]
 
36
  function_signature = coding_task["function_signature"]
37
  ground_truth = coding_task["ground_truth_solution"]
38
  test_script = coding_task["test_script"]
 
 
39
 
40
  upper_columns = st.columns([2, 7])
41
  with upper_columns[0]:
42
  st.markdown(f"### Question ID: {qid}")
43
- st.image(image, use_column_width=True)
44
- st.markdown("---")
 
 
 
 
 
45
  with upper_columns[1]:
46
  st.markdown(f"### Function Signature:")
47
  st.markdown(f"")
48
  st.markdown(f"""```python
49
  {function_signature}
50
  ```""")
51
- st.markdown(f"### Test Script:")
52
  st.markdown(f"")
53
- st.markdown(f"""```python
54
- {test_script}
55
  ```""")
56
  st.markdown(f"### Ground Truth Solution:")
57
  st.markdown(f"")
58
  st.markdown(f"""```python
59
  {ground_truth}
 
 
 
 
 
60
  ```""")
 
1
  import streamlit as st
2
  import datasets
3
+ import json
4
+
5
+ def load_json(file_path):
6
+ with open(file_path, "r") as f:
7
+ return json.load(f)
8
 
9
  humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
10
+ idx_mapping = load_json("idx_mapping.json")
11
+
12
  st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")
13
 
14
+ st.markdown("> <i>This is a viewer for the **HumanEval-V** benchmark, which includes 253 coding tasks. Use the navigation buttons or enter an index to browse through the tasks. Please note that image loading may take a moment after switching to the next task. If you encounter any issues or have questions, feel free to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True)
15
  st.markdown("---")
16
 
17
+ max_index = 253
18
 
19
  # Initialize session state for index if not present
20
  if 'index' not in st.session_state:
 
30
  index_input = st.number_input(
31
  f"Go to index (1-{max_index}):",
32
  min_value=1,
33
+ max_value=max_index,
34
  value=st.session_state.index,
35
  key="index_input",
36
  help="Enter an index and jump to that index.",
37
  step=1
38
  )
39
 
40
+ coding_task = humaneval_v_data[idx_mapping[str(index_input-1)]]
41
  qid = coding_task["qid"]
42
+ diagram = coding_task["diagram"]
43
+ diagram_description = coding_task["ground_truth_diagram_description"]
44
  function_signature = coding_task["function_signature"]
45
  ground_truth = coding_task["ground_truth_solution"]
46
  test_script = coding_task["test_script"]
47
+ task_type = coding_task["task_type"]
48
+ capability_aspects = coding_task["capability_aspects"]
49
 
50
  upper_columns = st.columns([2, 7])
51
  with upper_columns[0]:
52
  st.markdown(f"### Question ID: {qid}")
53
+ st.image(diagram, use_column_width=True)
54
+ st.markdown(f"")
55
+ st.markdown(f"### Task Type: {task_type}")
56
+ st.markdown(f"")
57
+ st.markdown(f"### Capability Aspects:")
58
+ st.markdown(f"")
59
+ st.json(capability_aspects)
60
  with upper_columns[1]:
61
  st.markdown(f"### Function Signature:")
62
  st.markdown(f"")
63
  st.markdown(f"""```python
64
  {function_signature}
65
  ```""")
66
+ st.markdown(f"### Ground Truth Diagram Description:")
67
  st.markdown(f"")
68
+ st.markdown(f"""```markdown
69
+ {diagram_description}
70
  ```""")
71
  st.markdown(f"### Ground Truth Solution:")
72
  st.markdown(f"")
73
  st.markdown(f"""```python
74
  {ground_truth}
75
+ ```""")
76
+ st.markdown(f"### Test Script:")
77
+ st.markdown(f"")
78
+ st.markdown(f"""```python
79
+ {test_script}
80
  ```""")
humaneval_v_test_hf/data-00000-of-00001.arrow CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:774e4cacfc259917fb5a5e8433e6cacbcac01063cb30fd3560170b3a0a9fa76e
3
- size 12842912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4de3ea7bbf661db191acd796966a23ffa9e4cd7c57510709013266da0b715110
3
+ size 32923016
humaneval_v_test_hf/dataset_info.json CHANGED
@@ -3,15 +3,15 @@
3
  "citation": "",
4
  "config_name": "default",
5
  "dataset_name": "human_eval-v-benchmark",
6
- "dataset_size": 12841384,
7
  "description": "",
8
  "download_checksums": {
9
- "hf://datasets/HumanEval-V/HumanEval-V-Benchmark@50af2be232641ca618f6aecce901ca5e5a83b20e/data/test-00000-of-00001.parquet": {
10
- "num_bytes": 12571814,
11
  "checksum": null
12
  }
13
  },
14
- "download_size": 12571814,
15
  "features": {
16
  "qid": {
17
  "dtype": "string",
@@ -21,7 +21,7 @@
21
  "dtype": "string",
22
  "_type": "Value"
23
  },
24
- "image_description": {
25
  "dtype": "string",
26
  "_type": "Value"
27
  },
@@ -33,18 +33,73 @@
33
  "dtype": "string",
34
  "_type": "Value"
35
  },
36
- "image": {
37
  "_type": "Image"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  }
39
  },
40
  "homepage": "",
41
  "license": "",
42
- "size_in_bytes": 25413198,
43
  "splits": {
44
  "test": {
45
  "name": "test",
46
- "num_bytes": 12841384,
47
- "num_examples": 108,
48
  "dataset_name": "human_eval-v-benchmark"
49
  }
50
  },
 
3
  "citation": "",
4
  "config_name": "default",
5
  "dataset_name": "human_eval-v-benchmark",
6
+ "dataset_size": 32918942,
7
  "description": "",
8
  "download_checksums": {
9
+ "hf://datasets/HumanEval-V/HumanEval-V-Benchmark@8a3465c8cdcd25f65b3e13651e21cef9406e1799/data/test-00000-of-00001.parquet": {
10
+ "num_bytes": 32012630,
11
  "checksum": null
12
  }
13
  },
14
+ "download_size": 32012630,
15
  "features": {
16
  "qid": {
17
  "dtype": "string",
 
21
  "dtype": "string",
22
  "_type": "Value"
23
  },
24
+ "ground_truth_diagram_description": {
25
  "dtype": "string",
26
  "_type": "Value"
27
  },
 
33
  "dtype": "string",
34
  "_type": "Value"
35
  },
36
+ "diagram": {
37
  "_type": "Image"
38
+ },
39
+ "capability_aspects": {
40
+ "Common Sense": {
41
+ "feature": {
42
+ "dtype": "string",
43
+ "_type": "Value"
44
+ },
45
+ "_type": "Sequence"
46
+ },
47
+ "Data Structures": {
48
+ "feature": {
49
+ "dtype": "string",
50
+ "_type": "Value"
51
+ },
52
+ "_type": "Sequence"
53
+ },
54
+ "Dynamic Patterns": {
55
+ "feature": {
56
+ "dtype": "string",
57
+ "_type": "Value"
58
+ },
59
+ "_type": "Sequence"
60
+ },
61
+ "Geometric Objects": {
62
+ "feature": {
63
+ "dtype": "string",
64
+ "_type": "Value"
65
+ },
66
+ "_type": "Sequence"
67
+ },
68
+ "Mathematical Operations": {
69
+ "feature": {
70
+ "dtype": "string",
71
+ "_type": "Value"
72
+ },
73
+ "_type": "Sequence"
74
+ },
75
+ "Spatial Transformations": {
76
+ "feature": {
77
+ "dtype": "string",
78
+ "_type": "Value"
79
+ },
80
+ "_type": "Sequence"
81
+ },
82
+ "Topological Relations": {
83
+ "feature": {
84
+ "dtype": "string",
85
+ "_type": "Value"
86
+ },
87
+ "_type": "Sequence"
88
+ }
89
+ },
90
+ "task_type": {
91
+ "dtype": "string",
92
+ "_type": "Value"
93
  }
94
  },
95
  "homepage": "",
96
  "license": "",
97
+ "size_in_bytes": 64931572,
98
  "splits": {
99
  "test": {
100
  "name": "test",
101
+ "num_bytes": 32918942,
102
+ "num_examples": 253,
103
  "dataset_name": "human_eval-v-benchmark"
104
  }
105
  },
humaneval_v_test_hf/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "d8ffc8935ede93f4",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "4c03438fb70814f3",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
idx_mapping.json ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": 0,
3
+ "1": 1,
4
+ "2": 2,
5
+ "27": 3,
6
+ "28": 4,
7
+ "29": 5,
8
+ "250": 6,
9
+ "251": 7,
10
+ "252": 8,
11
+ "30": 9,
12
+ "31": 10,
13
+ "32": 11,
14
+ "33": 12,
15
+ "34": 13,
16
+ "35": 14,
17
+ "36": 15,
18
+ "37": 16,
19
+ "38": 17,
20
+ "39": 18,
21
+ "40": 19,
22
+ "41": 20,
23
+ "42": 21,
24
+ "43": 22,
25
+ "44": 23,
26
+ "45": 24,
27
+ "46": 25,
28
+ "47": 26,
29
+ "48": 27,
30
+ "49": 28,
31
+ "50": 29,
32
+ "51": 30,
33
+ "52": 31,
34
+ "3": 32,
35
+ "4": 33,
36
+ "5": 34,
37
+ "53": 35,
38
+ "54": 36,
39
+ "55": 37,
40
+ "56": 38,
41
+ "57": 39,
42
+ "58": 40,
43
+ "59": 41,
44
+ "60": 42,
45
+ "61": 43,
46
+ "62": 44,
47
+ "63": 45,
48
+ "64": 46,
49
+ "65": 47,
50
+ "66": 48,
51
+ "67": 49,
52
+ "68": 50,
53
+ "69": 51,
54
+ "70": 52,
55
+ "71": 53,
56
+ "72": 54,
57
+ "73": 55,
58
+ "74": 56,
59
+ "75": 57,
60
+ "76": 58,
61
+ "6": 59,
62
+ "7": 60,
63
+ "8": 61,
64
+ "77": 62,
65
+ "78": 63,
66
+ "79": 64,
67
+ "80": 65,
68
+ "81": 66,
69
+ "82": 67,
70
+ "83": 68,
71
+ "84": 69,
72
+ "85": 70,
73
+ "86": 71,
74
+ "87": 72,
75
+ "88": 73,
76
+ "89": 74,
77
+ "90": 75,
78
+ "91": 76,
79
+ "92": 77,
80
+ "93": 78,
81
+ "94": 79,
82
+ "95": 80,
83
+ "96": 81,
84
+ "97": 82,
85
+ "98": 83,
86
+ "99": 84,
87
+ "100": 85,
88
+ "101": 86,
89
+ "102": 87,
90
+ "103": 88,
91
+ "104": 89,
92
+ "9": 90,
93
+ "10": 91,
94
+ "11": 92,
95
+ "105": 93,
96
+ "106": 94,
97
+ "107": 95,
98
+ "108": 96,
99
+ "109": 97,
100
+ "110": 98,
101
+ "111": 99,
102
+ "112": 100,
103
+ "113": 101,
104
+ "114": 102,
105
+ "115": 103,
106
+ "116": 104,
107
+ "117": 105,
108
+ "118": 106,
109
+ "119": 107,
110
+ "120": 108,
111
+ "121": 109,
112
+ "122": 110,
113
+ "123": 111,
114
+ "124": 112,
115
+ "125": 113,
116
+ "126": 114,
117
+ "12": 115,
118
+ "13": 116,
119
+ "14": 117,
120
+ "127": 118,
121
+ "128": 119,
122
+ "129": 120,
123
+ "130": 121,
124
+ "131": 122,
125
+ "132": 123,
126
+ "133": 124,
127
+ "134": 125,
128
+ "135": 126,
129
+ "136": 127,
130
+ "137": 128,
131
+ "138": 129,
132
+ "139": 130,
133
+ "140": 131,
134
+ "141": 132,
135
+ "142": 133,
136
+ "143": 134,
137
+ "144": 135,
138
+ "145": 136,
139
+ "146": 137,
140
+ "15": 138,
141
+ "16": 139,
142
+ "17": 140,
143
+ "147": 141,
144
+ "148": 142,
145
+ "149": 143,
146
+ "150": 144,
147
+ "151": 145,
148
+ "152": 146,
149
+ "153": 147,
150
+ "154": 148,
151
+ "155": 149,
152
+ "156": 150,
153
+ "157": 151,
154
+ "158": 152,
155
+ "159": 153,
156
+ "160": 154,
157
+ "161": 155,
158
+ "162": 156,
159
+ "163": 157,
160
+ "164": 158,
161
+ "165": 159,
162
+ "166": 160,
163
+ "167": 161,
164
+ "168": 162,
165
+ "169": 163,
166
+ "170": 164,
167
+ "171": 165,
168
+ "172": 166,
169
+ "18": 167,
170
+ "19": 168,
171
+ "20": 169,
172
+ "173": 170,
173
+ "174": 171,
174
+ "175": 172,
175
+ "176": 173,
176
+ "177": 174,
177
+ "178": 175,
178
+ "179": 176,
179
+ "180": 177,
180
+ "181": 178,
181
+ "182": 179,
182
+ "183": 180,
183
+ "184": 181,
184
+ "185": 182,
185
+ "186": 183,
186
+ "187": 184,
187
+ "188": 185,
188
+ "189": 186,
189
+ "190": 187,
190
+ "191": 188,
191
+ "192": 189,
192
+ "193": 190,
193
+ "194": 191,
194
+ "195": 192,
195
+ "196": 193,
196
+ "197": 194,
197
+ "198": 195,
198
+ "199": 196,
199
+ "21": 197,
200
+ "22": 198,
201
+ "23": 199,
202
+ "200": 200,
203
+ "201": 201,
204
+ "202": 202,
205
+ "203": 203,
206
+ "204": 204,
207
+ "205": 205,
208
+ "206": 206,
209
+ "207": 207,
210
+ "208": 208,
211
+ "209": 209,
212
+ "210": 210,
213
+ "211": 211,
214
+ "212": 212,
215
+ "213": 213,
216
+ "214": 214,
217
+ "215": 215,
218
+ "216": 216,
219
+ "217": 217,
220
+ "218": 218,
221
+ "219": 219,
222
+ "220": 220,
223
+ "221": 221,
224
+ "222": 222,
225
+ "223": 223,
226
+ "24": 224,
227
+ "25": 225,
228
+ "26": 226,
229
+ "224": 227,
230
+ "225": 228,
231
+ "226": 229,
232
+ "227": 230,
233
+ "228": 231,
234
+ "229": 232,
235
+ "230": 233,
236
+ "231": 234,
237
+ "232": 235,
238
+ "233": 236,
239
+ "234": 237,
240
+ "235": 238,
241
+ "236": 239,
242
+ "237": 240,
243
+ "238": 241,
244
+ "239": 242,
245
+ "240": 243,
246
+ "241": 244,
247
+ "242": 245,
248
+ "243": 246,
249
+ "244": 247,
250
+ "245": 248,
251
+ "246": 249,
252
+ "247": 250,
253
+ "248": 251,
254
+ "249": 252
255
+ }
sort_id.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ import json
3
+
4
+ def get_float_qid(qid):
5
+ return float(qid[1:].replace('-', '.'))
6
+
7
+ humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
8
+
9
+ qid_to_idx_mapping = {x["qid"]: idx for idx, x in enumerate(humaneval_v_data)}
10
+
11
+ reranked_data = sorted(humaneval_v_data, key=lambda x: get_float_qid(x["qid"]))
12
+
13
+ id_idx_mappping = {x["qid"]: idx for idx, x in enumerate(reranked_data)}
14
+
15
+
16
+ old_to_new_id = {id_idx_mappping[x["qid"]]: qid_to_idx_mapping[x["qid"]] for x in humaneval_v_data}
17
+
18
+ print(json.dumps(old_to_new_id, indent=4))