File size: 4,324 Bytes
67e898c 58c260c 6fc4565 67e898c 6fc4565 0194ddd 6fc4565 0194ddd 67e898c 0194ddd 67e898c 6fc4565 0194ddd 6fc4565 0194ddd 67e898c 0194ddd 67e898c 6fc4565 67e898c 58c260c 6fc4565 67e898c 6fc4565 67e898c 6fc4565 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import streamlit as st
from src.nlp.experimental.textclassification.classify_title import train_data
from src.nlp.playground.pipelines.event_data_extractor import EventDataExtractor
from src.persistence.db import init_db
from src.utils.Event import Event, Schedule
from src.utils.apis.googlemaps_api import GoogleMapsAPI
from src.utils.helpers import normalize_data
filter_data = train_data["Veranstaltungstitel"]
@st.cache_resource
def init_connection():
return init_db()
@st.cache_resource
def init_event_data_extractor():
return EventDataExtractor()
@st.cache_data
def init_db_entries():
st.info("Fetching data")
elements = list(db.event_urls.find({"final": True, "class": "EventDetail"},
{"_id": 1, "url": 1, "data": 1, "html": 1, "information": 1}))
st.info("Fetched data")
filtered_elements = []
for el in elements:
if all(f not in el.get("data", "") for f in filter_data):
filtered_elements.append(el)
st.write(f"{len(filtered_elements)} Testdatensätze in der Datenbank")
return filtered_elements[:10]
@st.cache_resource
def init_google_maps_api():
return GoogleMapsAPI()
def event_similarity(actual, predicted):
# Liste der Attribute, die verglichen werden
attributes = [
(actual.title, predicted.title),
(actual.schedule, predicted.schedule),
(actual.prices, predicted.prices),
(actual.address, predicted.address),
(actual.organizers, predicted.organizers),
]
# Anzahl der übereinstimmenden Attribute berechnen
matches = sum(1 for a, p in attributes if a == p)
total_attributes = len(attributes)
# Prozentuale Übereinstimmung berechnen
similarity_percentage = (matches / total_attributes) * 100
return similarity_percentage
db = init_connection()
google_maps_api = init_google_maps_api()
# if "elements" not in st.session_state:
# st.info("Fetching data")
# elements = list(db.event_urls.find({"final":True, "class": "EventDetail"},{"_id":1, "url":1, "data":1, "html":1, "information":1}))
# st.info("Fetched data")
# filtered_elements = []
# for el in elements:
# if all(f not in el.get("data", "") for f in filter_data):
# filtered_elements.append(el)
# st.session_state.elements = filtered_elements
# st.write(f"{len(filtered_elements)} Testdatensätze in der Datenbank")
# if "event_data_extractor" not in st.session_state:
# st.info("Initialisiere Extractor Pipeline")
# st.session_state.event_data_extractor = EventDataExtractor()
event_data_extractor = init_event_data_extractor()
elements = init_db_entries()
start_tests = st.button("Starte Tests")
if start_tests:
for el in elements:
actual_event = Event()
actual_event.url = el.get("url")
actual_event.title = el.get("information", {}).get("actual", {}).get("title", "")
actual_event.organizers = el.get("information", {}).get("actual", {}).get("organizers", [])
actual_event.categories = el.get("information", {}).get("actual", {}).get("categories", [])
actual_event.locations = el.get("information", {}).get("actual", {}).get("locations", [])
actual_event.prices = el.get("information", {}).get("actual", {}).get("prices", [])
actual_event.address = el.get("information", {}).get("actual", {}).get("address", {}).get("formatted", None)
dates = el.get("information", {}).get("actual", {}).get("dates", [])
actual_event.schedule = [
Schedule(date.get("start_date", None), date.get("end_date", None), date.get("start_time", None),
date.get("end_time", None), date.get("admittance_time", None))
for date in dates]
with st.container(border=True):
st.markdown(el["data"])
with st.container(border=True):
preprocessed_md = normalize_data(el["data"])
st.markdown(preprocessed_md)
st.write(actual_event)
try:
predicted_event = event_data_extractor.extract(el["data"])
st.write(predicted_event)
st.info(f"Ähnlichkeit der Ergebnisse: {event_similarity(actual_event, predicted_event)}")
except Exception as e:
st.error(f"Fehler bei der Verarbeitung: {e}")
|