File size: 3,542 Bytes
da88570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import psutil
from transformers import pipeline
from abc import ABC

categories = [
    "Ausstellung",
    "Charity-Event",
    "Comedy",
    "Dinner-Show",
    "Dokumentation",
    "Neueröffnung",
    "Familienveranstaltung",
    "Feier",
    "Fest",
    "Filmfestival",
    "Filmvorführung",
    "Gaming",
    "Gesprächsabend",
    "Gottesdienst",
    "Infoveranstaltung",
    "Kabarett",
    "Kinderprogramm",
    "Kochkurs",
    "Konferenz",
    "Konzert",
    "Kulturabend",
    "Kunst",
    "Lesung",
    "Markt",
    "Messe",
    "Modenschau",
    "Museum",
    "Musical",
    "Musik",
    "Online-Kurs",
    "Oper",
    "Outdoor",
    "Party",
    "Performance",
    "Religiöse Veranstaltung",
    "Seminar",
    "Sport",
    "Startup-Event",
    "Tanz",
    "Tech-Event",
    "Theater",
    "Volksfest",
    "Vortrag",
    "Wanderung",
    "Webinar",
    "Workshop"
]

price = [
    "Preis",
    "Eintritt frei",
    "Tickets",
    "Andere"
]

title = [
    "Titel",
    "Termine",
    "Ort",
    "Eintritt",
    "Tickets",
    "Veranstalterinformation",
    "Sonstiges"
]

paragraph = [
    "Beschreibung",
    "Ort",
    "Zeit",
    "Ticket",
    "Eintritt",
    "Sonstiges"
]



class ClassifierMode(ABC):
    def __init__(self, labels, hypothesis_template):
        self._labels = labels
        self._hypothesis_template = hypothesis_template

    @property
    def labels(self):
        return self._labels

    @property
    def hypothesis_template(self):
        return self._hypothesis_template

class CategoryMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["Sport", "Musik", "Kunst"],
            hypothesis_template="Kategorie der Veranstaltung ist {}."
        )

class PriceMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["kostenlos", "günstig", "teuer"],
            hypothesis_template="Der Text enthält Informationen zu {}."
        )

class TitleMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["Veranstaltungstitel", "Abschnittstitel"],
            hypothesis_template="Die Überschrift ist ein {}"
        )

class ParagraphMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["Einleitung", "Hauptteil", "Schluss"],
            hypothesis_template="Der Text ist {} einer Veranstaltung."
        )

class CustomMode(ClassifierMode):
    def __init__(self, labels, hypothesis_template):
        if not labels or not hypothesis_template:
            raise ValueError("Labels und Hypothesis Template dürfen nicht leer sein!")
        super().__init__(labels, hypothesis_template)


class Prediction:
    def __init__(self, label: str, score: float):
        self.label = label
        self.score = score

class ZeroShotClassifier:
    def __init__(self):
        self.classifier = pipeline(
            task="zero-shot-classification",
            model="Sahajtomar/German_Zeroshot")

    def classify(self, text, mode: ClassifierMode):
        predictions = self.classifier(text, mode.labels, hypothesis_template=mode.hypothesis_template)

        # result = [{"label": pred, "score": predictions["scores"][i]} for i, pred in enumerate(predictions["labels"])]
        result = [Prediction(label=pred,score=predictions["scores"][i]) for i, pred in enumerate(predictions["labels"])]
        # return [r for r in result if r.score >= 0.2]  # Nur relevante Ergebnisse behalten
        return sorted(result, key=lambda x: x.score, reverse=True)