Spaces:

adojode
/

event-data-extraction-playground

Running

File size: 3,542 Bytes

da88570

import psutil
from transformers import pipeline
from abc import ABC

categories = [
    "Ausstellung",
    "Charity-Event",
    "Comedy",
    "Dinner-Show",
    "Dokumentation",
    "Neueröffnung",
    "Familienveranstaltung",
    "Feier",
    "Fest",
    "Filmfestival",
    "Filmvorführung",
    "Gaming",
    "Gesprächsabend",
    "Gottesdienst",
    "Infoveranstaltung",
    "Kabarett",
    "Kinderprogramm",
    "Kochkurs",
    "Konferenz",
    "Konzert",
    "Kulturabend",
    "Kunst",
    "Lesung",
    "Markt",
    "Messe",
    "Modenschau",
    "Museum",
    "Musical",
    "Musik",
    "Online-Kurs",
    "Oper",
    "Outdoor",
    "Party",
    "Performance",
    "Religiöse Veranstaltung",
    "Seminar",
    "Sport",
    "Startup-Event",
    "Tanz",
    "Tech-Event",
    "Theater",
    "Volksfest",
    "Vortrag",
    "Wanderung",
    "Webinar",
    "Workshop"
]

price = [
    "Preis",
    "Eintritt frei",
    "Tickets",
    "Andere"
]

title = [
    "Titel",
    "Termine",
    "Ort",
    "Eintritt",
    "Tickets",
    "Veranstalterinformation",
    "Sonstiges"
]

paragraph = [
    "Beschreibung",
    "Ort",
    "Zeit",
    "Ticket",
    "Eintritt",
    "Sonstiges"
]



class ClassifierMode(ABC):
    def __init__(self, labels, hypothesis_template):
        self._labels = labels
        self._hypothesis_template = hypothesis_template

    @property
    def labels(self):
        return self._labels

    @property
    def hypothesis_template(self):
        return self._hypothesis_template

class CategoryMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["Sport", "Musik", "Kunst"],
            hypothesis_template="Kategorie der Veranstaltung ist {}."
        )

class PriceMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["kostenlos", "günstig", "teuer"],
            hypothesis_template="Der Text enthält Informationen zu {}."
        )

class TitleMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["Veranstaltungstitel", "Abschnittstitel"],
            hypothesis_template="Die Überschrift ist ein {}"
        )

class ParagraphMode(ClassifierMode):
    def __init__(self):
        super().__init__(
            labels=["Einleitung", "Hauptteil", "Schluss"],
            hypothesis_template="Der Text ist {} einer Veranstaltung."
        )

class CustomMode(ClassifierMode):
    def __init__(self, labels, hypothesis_template):
        if not labels or not hypothesis_template:
            raise ValueError("Labels und Hypothesis Template dürfen nicht leer sein!")
        super().__init__(labels, hypothesis_template)


class Prediction:
    def __init__(self, label: str, score: float):
        self.label = label
        self.score = score

class ZeroShotClassifier:
    def __init__(self):
        self.classifier = pipeline(
            task="zero-shot-classification",
            model="Sahajtomar/German_Zeroshot")

    def classify(self, text, mode: ClassifierMode):
        predictions = self.classifier(text, mode.labels, hypothesis_template=mode.hypothesis_template)

        # result = [{"label": pred, "score": predictions["scores"][i]} for i, pred in enumerate(predictions["labels"])]
        result = [Prediction(label=pred,score=predictions["scores"][i]) for i, pred in enumerate(predictions["labels"])]
        # return [r for r in result if r.score >= 0.2]  # Nur relevante Ergebnisse behalten
        return sorted(result, key=lambda x: x.score, reverse=True)