|
import streamlit as st |
|
|
|
from src.nlp.experimental.textclassification.classify_title import train_data |
|
from src.nlp.playground.pipelines.event_data_extractor import EventDataExtractor |
|
from src.persistence.db import init_db |
|
from src.utils.Event import Event, Schedule |
|
from src.utils.apis.googlemaps_api import GoogleMapsAPI |
|
from src.utils.helpers import normalize_data |
|
|
|
filter_data = train_data["Veranstaltungstitel"] |
|
|
|
|
|
@st.cache_resource |
|
def init_connection(): |
|
return init_db() |
|
|
|
|
|
@st.cache_resource |
|
def init_event_data_extractor(): |
|
return EventDataExtractor() |
|
|
|
|
|
@st.cache_data |
|
def init_db_entries(): |
|
st.info("Fetching data") |
|
elements = list(db.event_urls.find({"final": True, "class": "EventDetail"}, |
|
{"_id": 1, "url": 1, "data": 1, "html": 1, "information": 1})) |
|
st.info("Fetched data") |
|
filtered_elements = [] |
|
for el in elements: |
|
if all(f not in el.get("data", "") for f in filter_data): |
|
filtered_elements.append(el) |
|
st.write(f"{len(filtered_elements)} Testdatensätze in der Datenbank") |
|
return filtered_elements[:10] |
|
|
|
@st.cache_resource |
|
def init_google_maps_api(): |
|
return GoogleMapsAPI() |
|
|
|
def event_similarity(actual, predicted): |
|
|
|
attributes = [ |
|
(actual.title, predicted.title), |
|
(actual.schedule, predicted.schedule), |
|
(actual.prices, predicted.prices), |
|
(actual.address, predicted.address), |
|
(actual.organizers, predicted.organizers), |
|
] |
|
|
|
|
|
matches = sum(1 for a, p in attributes if a == p) |
|
total_attributes = len(attributes) |
|
|
|
|
|
similarity_percentage = (matches / total_attributes) * 100 |
|
return similarity_percentage |
|
|
|
db = init_connection() |
|
google_maps_api = init_google_maps_api() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
event_data_extractor = init_event_data_extractor() |
|
elements = init_db_entries() |
|
|
|
start_tests = st.button("Starte Tests") |
|
if start_tests: |
|
for el in elements: |
|
actual_event = Event() |
|
actual_event.url = el.get("url") |
|
actual_event.title = el.get("information", {}).get("actual", {}).get("title", "") |
|
actual_event.organizers = el.get("information", {}).get("actual", {}).get("organizers", []) |
|
actual_event.categories = el.get("information", {}).get("actual", {}).get("categories", []) |
|
actual_event.locations = el.get("information", {}).get("actual", {}).get("locations", []) |
|
actual_event.prices = el.get("information", {}).get("actual", {}).get("prices", []) |
|
|
|
actual_event.address = el.get("information", {}).get("actual", {}).get("address", {}).get("formatted", None) |
|
|
|
dates = el.get("information", {}).get("actual", {}).get("dates", []) |
|
actual_event.schedule = [ |
|
Schedule(date.get("start_date", None), date.get("end_date", None), date.get("start_time", None), |
|
date.get("end_time", None), date.get("admittance_time", None)) |
|
for date in dates] |
|
|
|
with st.container(border=True): |
|
st.markdown(el["data"]) |
|
with st.container(border=True): |
|
preprocessed_md = normalize_data(el["data"]) |
|
st.markdown(preprocessed_md) |
|
st.write(actual_event) |
|
|
|
try: |
|
predicted_event = event_data_extractor.extract(el["data"]) |
|
st.write(predicted_event) |
|
st.info(f"Ähnlichkeit der Ergebnisse: {event_similarity(actual_event, predicted_event)}") |
|
|
|
except Exception as e: |
|
st.error(f"Fehler bei der Verarbeitung: {e}") |
|
|