gavinzli commited on
Commit
b1f8d8a
·
1 Parent(s): 85f3efe

Handle IncompleteRead exception in crawl function for improved error handling

Browse files
Files changed (1) hide show
  1. source/stats.py +2 -1
source/stats.py CHANGED
@@ -1,6 +1,7 @@
1
  """Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
2
  import time
3
  import urllib.request
 
4
  from datetime import datetime, timedelta
5
 
6
  from lxml import etree
@@ -38,7 +39,7 @@ def crawl(delta):
38
  html_text = text.decode("utf-8")
39
  page = etree.HTML(html_text)
40
  articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
41
- except urllib.error.URLError as error:
42
  logger.info(error)
43
  continue
44
  for article in articlelist:
 
1
  """Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
2
  import time
3
  import urllib.request
4
+ import http.client
5
  from datetime import datetime, timedelta
6
 
7
  from lxml import etree
 
39
  html_text = text.decode("utf-8")
40
  page = etree.HTML(html_text)
41
  articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
42
+ except (urllib.error.URLError, http.client.IncompleteRead) as error:
43
  logger.info(error)
44
  continue
45
  for article in articlelist: