jacklangerman commited on
Commit
1ccead8
·
1 Parent(s): 2e4af42
Files changed (1) hide show
  1. script.py +23 -2
script.py CHANGED
@@ -40,10 +40,31 @@ if __name__ == "__main__":
40
  print(params)
41
  import os
42
 
43
- print(os.system('pwd'))
44
  print(os.system('ls -lahtr'))
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
 
 
 
 
 
 
 
 
 
47
  print(dataset, flush=True)
48
  # dataset = load_dataset('webdataset', data_files={)
49
 
 
40
  print(params)
41
  import os
42
 
43
+ print('pwd:', os.system('pwd'))
44
  print(os.system('ls -lahtr'))
45
+ print(os.system('ls -lahtr /tmp/data/'))
46
+
47
+
48
+ data_path_test_server = Path('/tmp/data')
49
+ data_path_local = Path().home() / '.cache/huggingface/datasets/usm3d___hoho25k_test_x/'
50
+
51
+ if data_path_test_server.exists():
52
+ data_path = data_path_test_server
53
+ else:
54
+ data_path = data_path_local
55
+
56
+ print([str(p) for p in data_path.rglob('*validation*.arrow')])
57
 
58
+ # dataset = load_dataset(params['dataset'], trust_remote_code=True, use_auth_token=params['token'])
59
+ dataset = load_dataset(
60
+ "arrow",
61
+ data_files={
62
+ "validation": [str(p) for p in data_path.rglob('*validation*.arrow')],
63
+ "test": [str(p) for p in data_path.rglob('*test*.arrow')],
64
+ },
65
+ trust_remote_code=True,
66
+ # streaming=True
67
+ )
68
  print(dataset, flush=True)
69
  # dataset = load_dataset('webdataset', data_files={)
70