Add error handling for URL requests in crawl function
Browse files- source/safe.py +9 -5
source/safe.py
CHANGED
@@ -28,11 +28,15 @@ def crawl(delta):
|
|
28 |
else:
|
29 |
category_url = f"https://www.safe.gov.cn/safe/zcfgjd/index_{i}.html"
|
30 |
i = i + 1
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
36 |
for article in articlelist:
|
37 |
if isinstance(article, etree._Element):
|
38 |
subelement = etree.tostring(article).decode()
|
|
|
28 |
else:
|
29 |
category_url = f"https://www.safe.gov.cn/safe/zcfgjd/index_{i}.html"
|
30 |
i = i + 1
|
31 |
+
try:
|
32 |
+
req = urllib.request.urlopen(category_url)
|
33 |
+
text = req.read()
|
34 |
+
html_text = text.decode("utf-8")
|
35 |
+
page = etree.HTML(html_text)
|
36 |
+
articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
|
37 |
+
except urllib.error.URLError as error:
|
38 |
+
logger.error(error)
|
39 |
+
continue
|
40 |
for article in articlelist:
|
41 |
if isinstance(article, etree._Element):
|
42 |
subelement = etree.tostring(article).decode()
|