Spaces:

Oxbridge-Economics
/

Data-Collection-China

Running

gavinzli commited on Jan 28

Commit

b1f8d8a

1 Parent(s): 85f3efe

Handle IncompleteRead exception in crawl function for improved error handling

Files changed (1) hide show

source/stats.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
 import time
 import urllib.request
 from datetime import datetime, timedelta
 from lxml import etree
@@ -38,7 +39,7 @@ def crawl(delta):
             html_text = text.decode("utf-8")
             page = etree.HTML(html_text)
             articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
-        except urllib.error.URLError as error:
             logger.info(error)
             continue
         for article in articlelist:

 """Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
 import time
 import urllib.request
+import http.client
 from datetime import datetime, timedelta
 from lxml import etree
             html_text = text.decode("utf-8")
             page = etree.HTML(html_text)
             articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
+        except (urllib.error.URLError, http.client.IncompleteRead) as error:
             logger.info(error)
             continue
         for article in articlelist: