File size: 690 Bytes
2c8dc05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import os, shutil
from uparser import wordparse
from joblib import Parallel, delayed
from tqdm import tqdm

num_jobs = 20
infolder = 'Original'
outfolder = 'Words'

for fdr in [outfolder]:
    if os.path.exists(fdr):
        shutil.rmtree(fdr)
    os.mkdir(fdr)

flist = os.listdir(infolder)
for fname in flist:
    with open(f'{infolder}/{fname}', 'r') as f:
        cnts = f.readlines()

    i = 0
    
    words = []
    for l in cnts:
        l = l.strip().split('\t')
        words.append(l[0])
    
    fout = fname.split('_')[1]
    fout = fout.split('.')[0]
    print(fout)

    with open(f'{outfolder}/{fout}.words', 'w') as f:
        for w in words:
            f.write(w + '\n')