jacklangerman commited on
Commit
9cd050d
·
1 Parent(s): 7e7f798

update for 2025

Browse files
Files changed (2) hide show
  1. example_notebook.ipynb +0 -0
  2. script.py +33 -35
example_notebook.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
script.py CHANGED
@@ -1,58 +1,56 @@
1
  ### This is example of the script that will be run in the test environment.
2
- ### Some parts of the code are compulsory and you should NOT CHANGE THEM.
3
- ### They are between '''---compulsory---''' comments.
4
  ### You can change the rest of the code to define and test your solution.
5
  ### However, you should not change the signature of the provided function.
6
- ### The script would save "submission.parquet" file in the current directory.
7
  ### You can use any additional files and subdirectories to organize your code.
8
 
9
- '''---compulsory---'''
10
- import hoho; hoho.setup() # YOU MUST CALL hoho.setup() BEFORE ANYTHING ELSE
11
- '''---compulsory---'''
12
-
13
  from pathlib import Path
14
  from tqdm import tqdm
15
  import pandas as pd
16
  import numpy as np
17
-
 
18
 
19
  def empty_solution(sample):
20
  '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
21
  return np.zeros((2,3)), [(0, 1)]
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  if __name__ == "__main__":
25
  print ("------------ Loading dataset------------ ")
26
- params = hoho.get_params()
27
-
28
- # by default it is usually better to use `get_dataset()` like this
29
- #
30
- # dataset = hoho.get_dataset(split='all')
31
- #
32
- # but in this case (because we don't do anything with the sample
33
- # anyway) we set `decode=None`. We can set the `split` argument
34
- # to 'train' or 'val' ('all' defaults back to 'train') if we are
35
- # testing ourselves locally.
36
- #
37
- # dataset = hoho.get_dataset(split='val', decode=None)
38
- #
39
- # On the test server *`split` must be set to 'all'*
40
- # to compute both the public and private leaderboards.
41
- #
42
- dataset = hoho.get_dataset(split='all', decode=None)
43
 
44
  print('------------ Now you can do your solution ---------------')
45
  solution = []
46
- for i, sample in enumerate(tqdm(dataset)):
47
- # replace this with your solution
48
- pred_vertices, pred_edges = empty_solution(sample)
 
 
 
 
 
 
 
 
49
 
50
- solution.append({
51
- '__key__': sample['__key__'],
52
- 'wf_vertices': pred_vertices.tolist(),
53
- 'wf_edges': pred_edges
54
- })
55
  print('------------ Saving results ---------------')
56
- sub = pd.DataFrame(solution, columns=["__key__", "wf_vertices", "wf_edges"])
57
- sub.to_parquet(Path(params['output_path']) / "submission.parquet")
58
  print("------------ Done ------------ ")
 
1
  ### This is example of the script that will be run in the test environment.
2
+
 
3
  ### You can change the rest of the code to define and test your solution.
4
  ### However, you should not change the signature of the provided function.
5
+ ### The script saves "submission.parquet" file in the current directory.
6
  ### You can use any additional files and subdirectories to organize your code.
7
 
 
 
 
 
8
  from pathlib import Path
9
  from tqdm import tqdm
10
  import pandas as pd
11
  import numpy as np
12
+ from datasets import load_dataset
13
+ from typing import Dict
14
 
15
  def empty_solution(sample):
16
  '''Return a minimal valid solution, i.e. 2 vertices and 1 edge.'''
17
  return np.zeros((2,3)), [(0, 1)]
18
 
19
+ class Sample(Dict):
20
+ def pick_repr_data(self, x):
21
+ if hasattr(x, 'shape'):
22
+ return x.shape
23
+ if isinstance(x, (str, float, int)):
24
+ return x
25
+ if isinstance(x, list):
26
+ return [type(x[0])] if len(x) > 0 else []
27
+ return type(x)
28
+
29
+ def __repr__(self):
30
+ # return str({k: v.shape if hasattr(v, 'shape') else [type(v[0])] if isinstance(v, list) else type(v) for k,v in self.items()})
31
+ return str({k: self.pick_repr_data(v) for k,v in self.items()})
32
+
33
 
34
  if __name__ == "__main__":
35
  print ("------------ Loading dataset------------ ")
36
+ dataset = load_dataset("usm3d/hoho25k_test_x", trust_remote_code=True)
37
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  print('------------ Now you can do your solution ---------------')
40
  solution = []
41
+ for subset_name in dataset:
42
+ for i, sample in enumerate(tqdm(dataset[subset_name])):
43
+ # replace this with your solution
44
+ print(Sample(sample), flush=True)
45
+ print('------')
46
+ pred_vertices, pred_edges = empty_solution(sample)
47
+ solution.append({
48
+ 'order_id': sample['order_id'],
49
+ 'wf_vertices': pred_vertices.tolist(),
50
+ 'wf_edges': pred_edges
51
+ })
52
 
 
 
 
 
 
53
  print('------------ Saving results ---------------')
54
+ sub = pd.DataFrame(solution, columns=["order_id", "wf_vertices", "wf_edges"])
55
+ sub.to_parquet("submission.parquet")
56
  print("------------ Done ------------ ")