import json import os from glob import glob imagefile = open('dataset/SBU_captioned_photo_dataset_urls.txt', 'r').readlines() captionfile = open('dataset/SBU_captioned_photo_dataset_captions.txt', 'r').readlines() valid_list = list(glob("images/*")) valid_list = [ i.split('/')[-1] for i in valid_list] name2cap = {} for imageurl, caption in zip(imagefile, captionfile): filename = imageurl.strip().split('/')[-1] name2cap[filename] = caption.strip() data_list = {} for valid_img in valid_list: data_list[valid_img]=name2cap[valid_img] fp = open('annotations/subcaption.json', 'w') json.dump(data_list, fp) print(len(data_list))