|
import psutil |
|
from transformers import pipeline |
|
from abc import ABC |
|
|
|
categories = [ |
|
"Ausstellung", |
|
"Charity-Event", |
|
"Comedy", |
|
"Dinner-Show", |
|
"Dokumentation", |
|
"Neueröffnung", |
|
"Familienveranstaltung", |
|
"Feier", |
|
"Fest", |
|
"Filmfestival", |
|
"Filmvorführung", |
|
"Gaming", |
|
"Gesprächsabend", |
|
"Gottesdienst", |
|
"Infoveranstaltung", |
|
"Kabarett", |
|
"Kinderprogramm", |
|
"Kochkurs", |
|
"Konferenz", |
|
"Konzert", |
|
"Kulturabend", |
|
"Kunst", |
|
"Lesung", |
|
"Markt", |
|
"Messe", |
|
"Modenschau", |
|
"Museum", |
|
"Musical", |
|
"Musik", |
|
"Online-Kurs", |
|
"Oper", |
|
"Outdoor", |
|
"Party", |
|
"Performance", |
|
"Religiöse Veranstaltung", |
|
"Seminar", |
|
"Sport", |
|
"Startup-Event", |
|
"Tanz", |
|
"Tech-Event", |
|
"Theater", |
|
"Volksfest", |
|
"Vortrag", |
|
"Wanderung", |
|
"Webinar", |
|
"Workshop" |
|
] |
|
|
|
price = [ |
|
"Preis", |
|
"Eintritt frei", |
|
"Tickets", |
|
"Andere" |
|
] |
|
|
|
title = [ |
|
"Titel", |
|
"Termine", |
|
"Ort", |
|
"Eintritt", |
|
"Tickets", |
|
"Veranstalterinformation", |
|
"Sonstiges" |
|
] |
|
|
|
paragraph = [ |
|
"Beschreibung", |
|
"Ort", |
|
"Zeit", |
|
"Ticket", |
|
"Eintritt", |
|
"Sonstiges" |
|
] |
|
|
|
|
|
|
|
class ClassifierMode(ABC): |
|
def __init__(self, labels, hypothesis_template): |
|
self._labels = labels |
|
self._hypothesis_template = hypothesis_template |
|
|
|
@property |
|
def labels(self): |
|
return self._labels |
|
|
|
@property |
|
def hypothesis_template(self): |
|
return self._hypothesis_template |
|
|
|
class CategoryMode(ClassifierMode): |
|
def __init__(self): |
|
super().__init__( |
|
labels=["Sport", "Musik", "Kunst"], |
|
hypothesis_template="Kategorie der Veranstaltung ist {}." |
|
) |
|
|
|
class PriceMode(ClassifierMode): |
|
def __init__(self): |
|
super().__init__( |
|
labels=["kostenlos", "günstig", "teuer"], |
|
hypothesis_template="Der Text enthält Informationen zu {}." |
|
) |
|
|
|
class TitleMode(ClassifierMode): |
|
def __init__(self): |
|
super().__init__( |
|
labels=["Veranstaltungstitel", "Abschnittstitel"], |
|
hypothesis_template="Die Überschrift ist ein {}" |
|
) |
|
|
|
class ParagraphMode(ClassifierMode): |
|
def __init__(self): |
|
super().__init__( |
|
labels=["Einleitung", "Hauptteil", "Schluss"], |
|
hypothesis_template="Der Text ist {} einer Veranstaltung." |
|
) |
|
|
|
class CustomMode(ClassifierMode): |
|
def __init__(self, labels, hypothesis_template): |
|
if not labels or not hypothesis_template: |
|
raise ValueError("Labels und Hypothesis Template dürfen nicht leer sein!") |
|
super().__init__(labels, hypothesis_template) |
|
|
|
|
|
class Prediction: |
|
def __init__(self, label: str, score: float): |
|
self.label = label |
|
self.score = score |
|
|
|
class ZeroShotClassifier: |
|
def __init__(self): |
|
self.classifier = pipeline( |
|
task="zero-shot-classification", |
|
model="Sahajtomar/German_Zeroshot") |
|
|
|
def classify(self, text, mode: ClassifierMode): |
|
predictions = self.classifier(text, mode.labels, hypothesis_template=mode.hypothesis_template) |
|
|
|
|
|
result = [Prediction(label=pred,score=predictions["scores"][i]) for i, pred in enumerate(predictions["labels"])] |
|
|
|
return sorted(result, key=lambda x: x.score, reverse=True) |
|
|