File size: 1,348 Bytes
25e2635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import json
import ijson
import random
from tqdm import tqdm


samples = []
file = open('all-cards-clean.json', 'r')
# Parse the JSON array items one by one
array_items = ijson.items(file, 'item')

# Iterate over the JSON array items
idx = 0
for item in array_items:
    idx += 1
    if item.get('img') is None:
        continue
    desc_l = (
        f'Title: {item.get("name")}\n'
        + (f'Cost: {item.get("details").get("mana_cost")}\n' if item.get("details").get("mana_cost") else '')
        + (f'Colors: {item.get("details").get("colors")}\n' if item.get("details").get("colors") else '')
        + f'Type: {item.get("details").get("type_line")}\n'
        + f'Desc: {item.get("details").get("oracle_text")}'
    )
    samples.append({'uid': item.get('id'), 'sentence': desc_l, 'image': item.get('img')})

file.close()

random.seed(420)

data = []
for elem_l in samples:
    for elem_r in random.choices(samples, k=100):
        data.append({
            'uuid': f'{elem_l.get("uid")}_{elem_r.get("uid")}',
            'sentence_1': elem_l.get('sentence'),
            'sentence_2': elem_r.get('sentence'),
            'image_1': elem_l.get('image'),
            'image_2': elem_r.get('image'),
        })

with open('dataset.json', 'w') as f:
    json.dump(data, f)

print(f'Generated {len(data)} samples from {len(samples)} elements.')