File size: 942 Bytes
6fc683c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import argparse
import json
import random

if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--input_path",
        default=None,
        type=str,
        required=True,
        help="input xnli file",
    )
    parser.add_argument(
        "--output_path",
        default=None,
        type=str,
        required=True,
        help="output xnli file",
    )
    parser.add_argument(
        "--sample_ratio",
        default=None,
        type=float,
        required=True,
        help="sample ratio",
    )

    args = parser.parse_args()
    lines = open(args.input_path, "r").readlines()
    head = lines[0]
    lines = lines[1:]
    random.seed(0)
    random.shuffle(lines)

    n_lines = int(len(lines) * args.sample_ratio)

    fout = open(args.output_path, "w")
    fout.write(head)
    for i, line in enumerate(lines[:n_lines]):
        fout.write(line)