File size: 3,980 Bytes
da88570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import json

import spacy


def convert_to_spacy_annotations(data):
    spacy_annotations = []

    for item in data:
        text = item["text"]
        entities = []

        for entity in item["entities"]:
            start_idx = text.find(entity["data"])
            if start_idx != -1:
                end_idx = start_idx + len(entity["data"])
                entities.append((start_idx, end_idx, entity["label"]))

        spacy_annotations.append((text, {"entities": entities}))

    return spacy_annotations


# Beispiel JSON-Daten
data = [
  {
    "text": "Im Rahmen der Jubiläumstour zu Ehren von Lutz Kayser (\"50 Jahre OTRAG – Oribital Transport und Raketen AG\") machen Film und Regisseur am 05.12.2024 im Technik-Salon Station in der TIB in Hannover.",
    "entities": [{ "data": "05.12.2024", "label": "EVENT_DATE" }]
  },
  {
    "text": "Wann? 05.12.2024, 19:00-21:00",
    "entities": [{ "data": "05.12.2024", "label": "EVENT_DATE" }]
  },
  {
    "text": "Der siebte Workshop Retrodigitalisierung findet am 20.03.2025 und 21.03.2025 bei ZB MED – Informationszentrum Lebenswissenschaften in Köln statt.",
    "entities": [
      { "data": "20.03.2025", "label": "EVENT_DATE" },
      { "data": "21.03.2025", "label": "EVENT_DATE" }
    ]
  },
  {
    "text": "Wann? 20.03.2025 - 21.03.2025",
    "entities": [{ "data": "20.03.2025 - 21.03.2025", "label": "EVENT_DATE_RANGE" }]
  },
  {
    "text": "Die 18. ACM International Conference on Web Search and Data Mining (WSDM 2025) wird vom 10.03.2025 - 14.03.2025 in Hannover stattfinden.",
    "entities": [{ "data": "10.03.2025 - 14.03.2025", "label": "EVENT_DATE_RANGE" }]
  },
  {
    "text": "So. 08.12.2024 12:15 - 13:15 CET",
    "entities": [{ "data": "08.12.2024", "label": "EVENT_DATE" }]
  },
  {
    "text": "24.12.2025 um 16:00",
    "entities": [{ "data": "24.12.2025", "label": "EVENT_DATE" }]
  },
  {
    "text": "07.01.2025",
    "entities": [{ "data": "07.01.2025", "label": "EVENT_DATE" }]
  },
  {
    "text": "Am 01.07.2025",
    "entities": [{ "data": "01.07.2025", "label": "EVENT_DATE" }]
  },
  {
    "text": "Wann? 07.11.2024 - 09.03.2025",
    "entities": [{ "data": "07.11.2024 - 09.03.2025", "label": "EVENT_DATE_RANGE" }]
  },
  {
    "text": "01.11.2024 - 09.03.2025",
    "entities": [{ "data": "01.11.2024 - 09.03.2025", "label": "EVENT_DATE_RANGE" }]
  },
  {
    "text": "01.09.2025 - 26.12.2024",
    "entities": [{ "data": "01.09.2025 - 26.12.2024", "label": "EVENT_DATE_RANGE" }]
  },
  {
    "text": "Premiere am 01.12.2024",
    "entities": [
      {
        "data": "01.12.2024",
        "label": "EVENT_DATE"
      }
    ]
  },
  {
    "text": "01.11.2025 ab 16:00",
    "entities": [
      {
        "data": "01.11.2025",
        "label": "EVENT_DATE"
      }
    ]
  },
  {
    "text": "15.01.2025 ab 18:00",
    "entities": [
      {
        "data": "15.01.2025",
        "label": "EVENT_DATE"
      }
    ]
  },
  {
    "text": "01.12.2025 ab 14:00-15:00",
    "entities": [
      {
        "data": "01.12.2025",
        "label": "EVENT_DATE"
      }
    ]
  },
  {
    "text": "18.01.2025 ab 11:00-18:00",
    "entities": [
      {
        "data": "18.01.2025",
        "label": "EVENT_DATE"
      }
    ]
  },
  {
    "text": "01.12.2024 – 08.12.2024",
    "entities": [
      {
        "data": "01.12.2024 – 08.12.2024",
        "label": "EVENT_DATE_RANGE"
      }
    ]
  },
  {
    "text": "Freitag 16:00-20:00 / Samstag 11:00-20:00 / Sonntag 11:00-18:00",
    "entities": []
  },
  {
    "text": "So, 15.12.2024 Beginn: 15:00 Einlass: 14:30",
    "entities": [
      {
        "data": "15.12.2024",
        "label": "EVENT_DATE"
      }
    ]
  }
]






# Umwandlung in spaCy-Format
annotations = convert_to_spacy_annotations(data)
# JSON speichern
with open("../annotations.json", "w", encoding="utf-8") as f:
    json.dump(annotations, f, ensure_ascii=False, indent=4)

# Ausgabe prüfen
print(json.dumps(annotations, ensure_ascii=False, indent=4))