|
import json |
|
import random |
|
|
|
import spacy |
|
from spacy.training.example import Example |
|
|
|
|
|
nlp = spacy.load("de_core_news_sm") |
|
|
|
with open('../annotations.json', encoding='utf-8') as f: |
|
TRAINING_DATA = json.load(f) |
|
|
|
|
|
if "ner" not in nlp.pipe_names: |
|
ner = nlp.create_pipe("ner") |
|
nlp.add_pipe("ner", last=True) |
|
else: |
|
ner = nlp.get_pipe("ner") |
|
|
|
|
|
ner.add_label("START_DATE") |
|
ner.add_label("END_DATE") |
|
ner.add_label("DATE") |
|
ner.add_label("OTHER") |
|
|
|
|
|
examples = [] |
|
for text, annotations in TRAINING_DATA: |
|
doc = nlp.make_doc(text) |
|
example = Example.from_dict(doc, annotations) |
|
examples.append(example) |
|
|
|
|
|
optimizer = nlp.begin_training() |
|
for epoch in range(30): |
|
print(f"Epoch {epoch + 1}") |
|
losses = {} |
|
|
|
random.shuffle(examples) |
|
|
|
for example in examples: |
|
nlp.update([example], drop=0.5, losses=losses) |
|
print(losses) |
|
|
|
|
|
nlp.to_disk("models/date_model") |
|
|
|
|