manaviel85370
commited on
Commit
·
5f8d317
1
Parent(s):
da88570
reactivate maps crawling
Browse files- pages/1_Get_Urls.py +27 -25
pages/1_Get_Urls.py
CHANGED
@@ -74,8 +74,7 @@ st.write("""
|
|
74 |
Wenn **"Google Maps Ergebnisse finden"** aktiviert ist, werden bei den Stadtportalen zusätzlich noch neue Veranstaltungsorte gesucht.""")
|
75 |
with st.form("Crawler Settings"):
|
76 |
count = st.number_input("Wie viele URLs sollen gecrawled werden?", step=1)
|
77 |
-
maps = st.checkbox("Google Maps Ergebnisse finden"
|
78 |
-
st.info("Aktuell können keine neuen Start-URLs generiert werden. Billing für GCP fehlt.")
|
79 |
# Every form must have a submit button.
|
80 |
submitted = st.form_submit_button("Starte Crawler")
|
81 |
if submitted:
|
@@ -88,29 +87,32 @@ with st.form("Crawler Settings"):
|
|
88 |
print(item)
|
89 |
if "maps_searches" not in item or "maps_searches" in item and type_id not in item["maps_searches"]:
|
90 |
st.info(f"Suche Maps Ergebnisse für {type_id} in {item['meta']['location']}")
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
"
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
114 |
|
115 |
if "maps_searches" in item:
|
116 |
maps_searches = item["maps_searches"]
|
|
|
74 |
Wenn **"Google Maps Ergebnisse finden"** aktiviert ist, werden bei den Stadtportalen zusätzlich noch neue Veranstaltungsorte gesucht.""")
|
75 |
with st.form("Crawler Settings"):
|
76 |
count = st.number_input("Wie viele URLs sollen gecrawled werden?", step=1)
|
77 |
+
maps = st.checkbox("Google Maps Ergebnisse finden")
|
|
|
78 |
# Every form must have a submit button.
|
79 |
submitted = st.form_submit_button("Starte Crawler")
|
80 |
if submitted:
|
|
|
87 |
print(item)
|
88 |
if "maps_searches" not in item or "maps_searches" in item and type_id not in item["maps_searches"]:
|
89 |
st.info(f"Suche Maps Ergebnisse für {type_id} in {item['meta']['location']}")
|
90 |
+
try:
|
91 |
+
maps_results = get_maps_results(type_id, item["meta"]["location"])
|
92 |
+
if maps_results:
|
93 |
+
new_elements = []
|
94 |
+
with st.expander("Maps Ergebnisse"):
|
95 |
+
for result in maps_results:
|
96 |
+
if result.website_uri \
|
97 |
+
and "facebook" not in result.website_uri \
|
98 |
+
and "instagram" not in result.website_uri \
|
99 |
+
and "tiktok" not in result.website_uri \
|
100 |
+
and result.website_uri not in [e["url"] for e in new_elements]:
|
101 |
+
element = {
|
102 |
+
"url_type": type_id,
|
103 |
+
"url": result.website_uri,
|
104 |
+
"meta":{
|
105 |
+
"website_host": result.display_name.text,
|
106 |
+
"location": result.formatted_address.split(", ")[1],
|
107 |
+
"address": result.formatted_address,
|
108 |
+
"maps_types": list(result.types)
|
109 |
+
}}
|
110 |
+
st.write(f"{element['meta']['website_host']} - {element['url']}")
|
111 |
+
new_elements.append(element)
|
112 |
+
if new_elements:
|
113 |
+
db.unsorted_urls.insert_many(new_elements)
|
114 |
+
except Exception as e:
|
115 |
+
st.error(e)
|
116 |
|
117 |
if "maps_searches" in item:
|
118 |
maps_searches = item["maps_searches"]
|