manaviel85370 commited on
Commit
5f8d317
·
1 Parent(s): da88570

reactivate maps crawling

Browse files
Files changed (1) hide show
  1. pages/1_Get_Urls.py +27 -25
pages/1_Get_Urls.py CHANGED
@@ -74,8 +74,7 @@ st.write("""
74
  Wenn **"Google Maps Ergebnisse finden"** aktiviert ist, werden bei den Stadtportalen zusätzlich noch neue Veranstaltungsorte gesucht.""")
75
  with st.form("Crawler Settings"):
76
  count = st.number_input("Wie viele URLs sollen gecrawled werden?", step=1)
77
- maps = st.checkbox("Google Maps Ergebnisse finden",disabled=True)
78
- st.info("Aktuell können keine neuen Start-URLs generiert werden. Billing für GCP fehlt.")
79
  # Every form must have a submit button.
80
  submitted = st.form_submit_button("Starte Crawler")
81
  if submitted:
@@ -88,29 +87,32 @@ with st.form("Crawler Settings"):
88
  print(item)
89
  if "maps_searches" not in item or "maps_searches" in item and type_id not in item["maps_searches"]:
90
  st.info(f"Suche Maps Ergebnisse für {type_id} in {item['meta']['location']}")
91
- maps_results = get_maps_results(type_id, item["meta"]["location"])
92
- if maps_results:
93
- new_elements = []
94
- with st.expander("Maps Ergebnisse"):
95
- for result in maps_results:
96
- if result.website_uri \
97
- and "facebook" not in result.website_uri \
98
- and "instagram" not in result.website_uri \
99
- and "tiktok" not in result.website_uri \
100
- and result.website_uri not in [e["url"] for e in new_elements]:
101
- element = {
102
- "url_type": type_id,
103
- "url": result.website_uri,
104
- "meta":{
105
- "website_host": result.display_name.text,
106
- "location": result.formatted_address.split(", ")[1],
107
- "address": result.formatted_address,
108
- "maps_types": list(result.types)
109
- }}
110
- st.write(f"{element['meta']['website_host']} - {element['url']}")
111
- new_elements.append(element)
112
- if new_elements:
113
- db.unsorted_urls.insert_many(new_elements)
 
 
 
114
 
115
  if "maps_searches" in item:
116
  maps_searches = item["maps_searches"]
 
74
  Wenn **"Google Maps Ergebnisse finden"** aktiviert ist, werden bei den Stadtportalen zusätzlich noch neue Veranstaltungsorte gesucht.""")
75
  with st.form("Crawler Settings"):
76
  count = st.number_input("Wie viele URLs sollen gecrawled werden?", step=1)
77
+ maps = st.checkbox("Google Maps Ergebnisse finden")
 
78
  # Every form must have a submit button.
79
  submitted = st.form_submit_button("Starte Crawler")
80
  if submitted:
 
87
  print(item)
88
  if "maps_searches" not in item or "maps_searches" in item and type_id not in item["maps_searches"]:
89
  st.info(f"Suche Maps Ergebnisse für {type_id} in {item['meta']['location']}")
90
+ try:
91
+ maps_results = get_maps_results(type_id, item["meta"]["location"])
92
+ if maps_results:
93
+ new_elements = []
94
+ with st.expander("Maps Ergebnisse"):
95
+ for result in maps_results:
96
+ if result.website_uri \
97
+ and "facebook" not in result.website_uri \
98
+ and "instagram" not in result.website_uri \
99
+ and "tiktok" not in result.website_uri \
100
+ and result.website_uri not in [e["url"] for e in new_elements]:
101
+ element = {
102
+ "url_type": type_id,
103
+ "url": result.website_uri,
104
+ "meta":{
105
+ "website_host": result.display_name.text,
106
+ "location": result.formatted_address.split(", ")[1],
107
+ "address": result.formatted_address,
108
+ "maps_types": list(result.types)
109
+ }}
110
+ st.write(f"{element['meta']['website_host']} - {element['url']}")
111
+ new_elements.append(element)
112
+ if new_elements:
113
+ db.unsorted_urls.insert_many(new_elements)
114
+ except Exception as e:
115
+ st.error(e)
116
 
117
  if "maps_searches" in item:
118
  maps_searches = item["maps_searches"]