manaviel85370
extract dates and times
83f1514
# import spacy
# from spacy import displacy
# from spacy.training import Example
# from spacy.tokens import Span
#
#
# nlp = spacy.blank("de") # Leeres Modell
# config = {"moves": None}
# parser = nlp.add_pipe("parser", config=config)
#
# # Definiere eigene Labels
# for label in ["ROOT", "nsubj", "obl", "det", "case", "punct"]:
# parser.add_label(label)
#
# # Erstelle Trainingsdaten
# TRAIN_DATA = [("Die Veranstaltung beginnt am 24.12.2025 um 16:00.",
# {"heads": [1, 2, 2, 2, 4, 5, 5, 2, 7],
# "deps": ["det", "nsubj", "ROOT", "case", "obl", "case", "obl", "punct"]})]
#
# # Trainingspipeline starten
# optimizer = nlp.begin_training()
# for epoch in range(50): # Mehr Epochen für bessere Ergebnisse
# for text, annotations in TRAIN_DATA:
# doc = nlp.make_doc(text)
# example = Example.from_dict(doc, annotations)
# nlp.update([example], drop=0.1, losses={})
#
import spacy
from spacy.tokens import Doc
from spacy.language import Language
nlp = spacy.load("de_core_news_lg")
@Language.component("custom_dependency_parser")
def custom_dependency_parser(doc):
for token in doc:
# Falls ein Token eine Zeitangabe ist, hänge es an das nächste Verb
if token.ent_type_ in ["TIME", "DATE"]:
for child in token.children:
if child.pos_ == "VERB":
token.head = child
token.dep_ = "time_modifier"
break
return doc
# Füge den Custom Parser nach dem Standard-Parser hinzu
nlp.add_pipe("custom_dependency_parser", after="parser")
doc = nlp("Die Veranstaltung beginnt am 24.12.2025 um 16:00, der Einlass startet ab 15:00. Beginn: 20:00, Ende: 21:00, Einlass: 19:00")
sub_sentences = []
current_sentence = []
for token in doc:
# Haupt-Subjekt identifizieren
if token.dep_ == "sb":
if current_sentence: # Falls bereits Tokens gesammelt wurden, speichere den bisherigen Satz
sub_sentences.append(" ".join([t for t in current_sentence]))
current_sentence = [token.text] # Starte einen neuen Satz
else:
current_sentence.append(token.text)
# Letzten Satz hinzufügen
if current_sentence:
sub_sentences.append(" ".join([t for t in current_sentence]))
# Ausgabe der getrennten Sätze
for i, sentence in enumerate(sub_sentences):
print(f"Satz {i+1}: {sentence}")
# Erlaubte Dependency-Typen
allowed_deps = {"sb", "oa", "da", "nk"}
allowed_pos = {"NOUN", "PROPN"} # Nomen & Eigennamen
# Gefilterte Tokens
filtered_tokens = [token for token in doc if token.dep_ in allowed_deps or token.pos_ in allowed_pos]
print(filtered_tokens)
# Ausgabe der Abhängigkeiten
relations = [] # Hier speichern wir die gefundenen Relationen
for token in doc:
print(f"{token.text} --({token.dep_})--> {token.head.text}")