Handle IncompleteRead exception in crawl function for improved error handling
Browse files- source/stats.py +2 -1
source/stats.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
"""Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
|
2 |
import time
|
3 |
import urllib.request
|
|
|
4 |
from datetime import datetime, timedelta
|
5 |
|
6 |
from lxml import etree
|
@@ -38,7 +39,7 @@ def crawl(delta):
|
|
38 |
html_text = text.decode("utf-8")
|
39 |
page = etree.HTML(html_text)
|
40 |
articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
|
41 |
-
except urllib.error.URLError as error:
|
42 |
logger.info(error)
|
43 |
continue
|
44 |
for article in articlelist:
|
|
|
1 |
"""Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
|
2 |
import time
|
3 |
import urllib.request
|
4 |
+
import http.client
|
5 |
from datetime import datetime, timedelta
|
6 |
|
7 |
from lxml import etree
|
|
|
39 |
html_text = text.decode("utf-8")
|
40 |
page = etree.HTML(html_text)
|
41 |
articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
|
42 |
+
except (urllib.error.URLError, http.client.IncompleteRead) as error:
|
43 |
logger.info(error)
|
44 |
continue
|
45 |
for article in articlelist:
|