File size: 690 Bytes
2c8dc05 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import os, shutil
from uparser import wordparse
from joblib import Parallel, delayed
from tqdm import tqdm
num_jobs = 20
infolder = 'Original'
outfolder = 'Words'
for fdr in [outfolder]:
if os.path.exists(fdr):
shutil.rmtree(fdr)
os.mkdir(fdr)
flist = os.listdir(infolder)
for fname in flist:
with open(f'{infolder}/{fname}', 'r') as f:
cnts = f.readlines()
i = 0
words = []
for l in cnts:
l = l.strip().split('\t')
words.append(l[0])
fout = fname.split('_')[1]
fout = fout.split('.')[0]
print(fout)
with open(f'{outfolder}/{fout}.words', 'w') as f:
for w in words:
f.write(w + '\n') |