Commit
·
9cd050d
1
Parent(s):
7e7f798
update for 2025
Browse files- example_notebook.ipynb +0 -0
- script.py +33 -35
example_notebook.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
script.py
CHANGED
@@ -1,58 +1,56 @@
|
|
1 |
### This is example of the script that will be run in the test environment.
|
2 |
-
|
3 |
-
### They are between '''---compulsory---''' comments.
|
4 |
### You can change the rest of the code to define and test your solution.
|
5 |
### However, you should not change the signature of the provided function.
|
6 |
-
### The script
|
7 |
### You can use any additional files and subdirectories to organize your code.
|
8 |
|
9 |
-
'''---compulsory---'''
|
10 |
-
import hoho; hoho.setup() # YOU MUST CALL hoho.setup() BEFORE ANYTHING ELSE
|
11 |
-
'''---compulsory---'''
|
12 |
-
|
13 |
from pathlib import Path
|
14 |
from tqdm import tqdm
|
15 |
import pandas as pd
|
16 |
import numpy as np
|
17 |
-
|
|
|
18 |
|
19 |
def empty_solution(sample):
|
20 |
'''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
|
21 |
return np.zeros((2,3)), [(0, 1)]
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
if __name__ == "__main__":
|
25 |
print ("------------ Loading dataset------------ ")
|
26 |
-
|
27 |
-
|
28 |
-
# by default it is usually better to use `get_dataset()` like this
|
29 |
-
#
|
30 |
-
# dataset = hoho.get_dataset(split='all')
|
31 |
-
#
|
32 |
-
# but in this case (because we don't do anything with the sample
|
33 |
-
# anyway) we set `decode=None`. We can set the `split` argument
|
34 |
-
# to 'train' or 'val' ('all' defaults back to 'train') if we are
|
35 |
-
# testing ourselves locally.
|
36 |
-
#
|
37 |
-
# dataset = hoho.get_dataset(split='val', decode=None)
|
38 |
-
#
|
39 |
-
# On the test server *`split` must be set to 'all'*
|
40 |
-
# to compute both the public and private leaderboards.
|
41 |
-
#
|
42 |
-
dataset = hoho.get_dataset(split='all', decode=None)
|
43 |
|
44 |
print('------------ Now you can do your solution ---------------')
|
45 |
solution = []
|
46 |
-
for
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
solution.append({
|
51 |
-
'__key__': sample['__key__'],
|
52 |
-
'wf_vertices': pred_vertices.tolist(),
|
53 |
-
'wf_edges': pred_edges
|
54 |
-
})
|
55 |
print('------------ Saving results ---------------')
|
56 |
-
sub = pd.DataFrame(solution, columns=["
|
57 |
-
sub.to_parquet(
|
58 |
print("------------ Done ------------ ")
|
|
|
1 |
### This is example of the script that will be run in the test environment.
|
2 |
+
|
|
|
3 |
### You can change the rest of the code to define and test your solution.
|
4 |
### However, you should not change the signature of the provided function.
|
5 |
+
### The script saves "submission.parquet" file in the current directory.
|
6 |
### You can use any additional files and subdirectories to organize your code.
|
7 |
|
|
|
|
|
|
|
|
|
8 |
from pathlib import Path
|
9 |
from tqdm import tqdm
|
10 |
import pandas as pd
|
11 |
import numpy as np
|
12 |
+
from datasets import load_dataset
|
13 |
+
from typing import Dict
|
14 |
|
15 |
def empty_solution(sample):
|
16 |
'''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
|
17 |
return np.zeros((2,3)), [(0, 1)]
|
18 |
|
19 |
+
class Sample(Dict):
|
20 |
+
def pick_repr_data(self, x):
|
21 |
+
if hasattr(x, 'shape'):
|
22 |
+
return x.shape
|
23 |
+
if isinstance(x, (str, float, int)):
|
24 |
+
return x
|
25 |
+
if isinstance(x, list):
|
26 |
+
return [type(x[0])] if len(x) > 0 else []
|
27 |
+
return type(x)
|
28 |
+
|
29 |
+
def __repr__(self):
|
30 |
+
# return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
|
31 |
+
return str({k: self.pick_repr_data(v) for k,v in self.items()})
|
32 |
+
|
33 |
|
34 |
if __name__ == "__main__":
|
35 |
print ("------------ Loading dataset------------ ")
|
36 |
+
dataset = load_dataset("usm3d/hoho25k_test_x", trust_remote_code=True)
|
37 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
print('------------ Now you can do your solution ---------------')
|
40 |
solution = []
|
41 |
+
for subset_name in dataset:
|
42 |
+
for i, sample in enumerate(tqdm(dataset[subset_name])):
|
43 |
+
# replace this with your solution
|
44 |
+
print(Sample(sample), flush=True)
|
45 |
+
print('------')
|
46 |
+
pred_vertices, pred_edges = empty_solution(sample)
|
47 |
+
solution.append({
|
48 |
+
'order_id': sample['order_id'],
|
49 |
+
'wf_vertices': pred_vertices.tolist(),
|
50 |
+
'wf_edges': pred_edges
|
51 |
+
})
|
52 |
|
|
|
|
|
|
|
|
|
|
|
53 |
print('------------ Saving results ---------------')
|
54 |
+
sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
|
55 |
+
sub.to_parquet("submission.parquet")
|
56 |
print("------------ Done ------------ ")
|