Spaces:
Build error
Build error
import json | |
import ijson | |
import random | |
from tqdm import tqdm | |
samples = [] | |
file = open('all-cards-clean.json', 'r') | |
# Parse the JSON array items one by one | |
array_items = ijson.items(file, 'item') | |
# Iterate over the JSON array items | |
idx = 0 | |
for item in array_items: | |
idx += 1 | |
if item.get('img') is None: | |
continue | |
desc_l = ( | |
f'Title: {item.get("name")}\n' | |
+ (f'Cost: {item.get("details").get("mana_cost")}\n' if item.get("details").get("mana_cost") else '') | |
+ (f'Colors: {item.get("details").get("colors")}\n' if item.get("details").get("colors") else '') | |
+ f'Type: {item.get("details").get("type_line")}\n' | |
+ f'Desc: {item.get("details").get("oracle_text")}' | |
) | |
samples.append({'uid': item.get('id'), 'sentence': desc_l, 'image': item.get('img')}) | |
file.close() | |
random.seed(420) | |
data = [] | |
for elem_l in samples: | |
for elem_r in random.choices(samples, k=100): | |
data.append({ | |
'uuid': f'{elem_l.get("uid")}_{elem_r.get("uid")}', | |
'sentence_1': elem_l.get('sentence'), | |
'sentence_2': elem_r.get('sentence'), | |
'image_1': elem_l.get('image'), | |
'image_2': elem_r.get('image'), | |
}) | |
with open('dataset.json', 'w') as f: | |
json.dump(data, f) | |
print(f'Generated {len(data)} samples from {len(samples)} elements.') | |