File size: 2,484 Bytes
ced4316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import json
import os
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any

from jsonschema import validate

# TODO: load from file
schema = {
    "title": "RawPaper",
    "type": "object",
    "properties": {
        "paper_uuid": {"type": "string"},
        "name": {"type": "string"},
        "collection_id": {"type": "string"},
        "collection_acronym": {"type": "string"},
        "volume_id": {"type": "string"},
        "booktitle": {"type": "string"},
        "paper_id": {"type": "integer"},
        "year": {"type": ["integer", "null"]},
        "paper_title": {"type": "string"},
        "authors": {
            "type": "array",
            "items": {
                "type": "object",
                "items": {
                    "first": {"type": ["string", "null"]},
                    "last": {"type": ["string", "null"]},
                },
            },
        },
        "abstract": {"type": ["string", "null"]},
        "url": {"type": "string"},
        "bibkey": {"type": ["string", "null"]},
        "doi": {"type": ["string", "null"]},
        "fulltext": {
            "type": ["object", "null"],
            "patternProperties": {"^.*$": {"type": "array", "items": {"type": "string"}}},
        },
    },
}

assert isinstance(schema, dict)


@dataclass
class RawPaper:
    paper_uuid: str
    name: str

    collection_id: str
    collection_acronym: str
    volume_id: str
    booktitle: str
    paper_id: int
    year: int | None

    paper_title: str
    authors: list[dict[str, str | None]]
    abstract: str | None
    url: str | None
    bibkey: str
    doi: str | None
    fulltext: dict[str, list[str]] | None

    @classmethod
    def load_from_json(cls, fpath: str | Path) -> "RawPaper":
        fpath = fpath if not isinstance(fpath, Path) else str(fpath)
        # return cls(**sienna.load(fpath))
        with open(fpath, "r") as f:
            data = cls(**json.load(f))
        return data

    def get_fname(self) -> str:
        return f"{self.name}.json"

    def dumps(self) -> dict[str, Any]:
        return asdict(self)

    def validate(self) -> None:
        validate(self.dumps(), schema=schema)

    def save(self, odir: str) -> None:
        self.validate()
        if not os.path.exists(odir):
            os.makedirs(odir, exist_ok=True)
        opath = os.path.join(odir, self.get_fname())
        with open(opath, "w") as f:
            f.write(json.dumps(self.dumps(), indent=2))