|
import json |
|
import os |
|
from dataclasses import asdict, dataclass |
|
from pathlib import Path |
|
from typing import Any |
|
|
|
from jsonschema import validate |
|
|
|
|
|
schema = { |
|
"title": "RawPaper", |
|
"type": "object", |
|
"properties": { |
|
"paper_uuid": {"type": "string"}, |
|
"name": {"type": "string"}, |
|
"collection_id": {"type": "string"}, |
|
"collection_acronym": {"type": "string"}, |
|
"volume_id": {"type": "string"}, |
|
"booktitle": {"type": "string"}, |
|
"paper_id": {"type": "integer"}, |
|
"year": {"type": ["integer", "null"]}, |
|
"paper_title": {"type": "string"}, |
|
"authors": { |
|
"type": "array", |
|
"items": { |
|
"type": "object", |
|
"items": { |
|
"first": {"type": ["string", "null"]}, |
|
"last": {"type": ["string", "null"]}, |
|
}, |
|
}, |
|
}, |
|
"abstract": {"type": ["string", "null"]}, |
|
"url": {"type": "string"}, |
|
"bibkey": {"type": ["string", "null"]}, |
|
"doi": {"type": ["string", "null"]}, |
|
"fulltext": { |
|
"type": ["object", "null"], |
|
"patternProperties": {"^.*$": {"type": "array", "items": {"type": "string"}}}, |
|
}, |
|
}, |
|
} |
|
|
|
assert isinstance(schema, dict) |
|
|
|
|
|
@dataclass |
|
class RawPaper: |
|
paper_uuid: str |
|
name: str |
|
|
|
collection_id: str |
|
collection_acronym: str |
|
volume_id: str |
|
booktitle: str |
|
paper_id: int |
|
year: int | None |
|
|
|
paper_title: str |
|
authors: list[dict[str, str | None]] |
|
abstract: str | None |
|
url: str | None |
|
bibkey: str |
|
doi: str | None |
|
fulltext: dict[str, list[str]] | None |
|
|
|
@classmethod |
|
def load_from_json(cls, fpath: str | Path) -> "RawPaper": |
|
fpath = fpath if not isinstance(fpath, Path) else str(fpath) |
|
|
|
with open(fpath, "r") as f: |
|
data = cls(**json.load(f)) |
|
return data |
|
|
|
def get_fname(self) -> str: |
|
return f"{self.name}.json" |
|
|
|
def dumps(self) -> dict[str, Any]: |
|
return asdict(self) |
|
|
|
def validate(self) -> None: |
|
validate(self.dumps(), schema=schema) |
|
|
|
def save(self, odir: str) -> None: |
|
self.validate() |
|
if not os.path.exists(odir): |
|
os.makedirs(odir, exist_ok=True) |
|
opath = os.path.join(odir, self.get_fname()) |
|
with open(opath, "w") as f: |
|
f.write(json.dumps(self.dumps(), indent=2)) |
|
|