gavinzli commited on
Commit
94ba329
·
1 Parent(s): b1f8d8a

Add error handling for URL requests in crawl function

Browse files
Files changed (1) hide show
  1. source/safe.py +9 -5
source/safe.py CHANGED
@@ -28,11 +28,15 @@ def crawl(delta):
28
  else:
29
  category_url = f"https://www.safe.gov.cn/safe/zcfgjd/index_{i}.html"
30
  i = i + 1
31
- req = urllib.request.urlopen(category_url)
32
- text = req.read()
33
- html_text = text.decode("utf-8")
34
- page = etree.HTML(html_text)
35
- articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
 
 
 
 
36
  for article in articlelist:
37
  if isinstance(article, etree._Element):
38
  subelement = etree.tostring(article).decode()
 
28
  else:
29
  category_url = f"https://www.safe.gov.cn/safe/zcfgjd/index_{i}.html"
30
  i = i + 1
31
+ try:
32
+ req = urllib.request.urlopen(category_url)
33
+ text = req.read()
34
+ html_text = text.decode("utf-8")
35
+ page = etree.HTML(html_text)
36
+ articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
37
+ except urllib.error.URLError as error:
38
+ logger.error(error)
39
+ continue
40
  for article in articlelist:
41
  if isinstance(article, etree._Element):
42
  subelement = etree.tostring(article).decode()