Spaces:

Oxbridge-Economics
/

Data-Collection-China

Sleeping

gavinzli commited on Feb 5

Commit

94ba329

1 Parent(s): b1f8d8a

Add error handling for URL requests in crawl function

Files changed (1) hide show

source/safe.py CHANGED Viewed

@@ -28,11 +28,15 @@ def crawl(delta):
         else:
             category_url = f"https://www.safe.gov.cn/safe/zcfgjd/index_{i}.html"
         i = i + 1
-        req = urllib.request.urlopen(category_url)
-        text = req.read()
-        html_text = text.decode("utf-8")
-        page = etree.HTML(html_text)
-        articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
         for article in articlelist:
             if isinstance(article, etree._Element):
                 subelement = etree.tostring(article).decode()

         else:
             category_url = f"https://www.safe.gov.cn/safe/zcfgjd/index_{i}.html"
         i = i + 1
+        try:
+            req = urllib.request.urlopen(category_url)
+            text = req.read()
+            html_text = text.decode("utf-8")
+            page = etree.HTML(html_text)
+            articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
+        except urllib.error.URLError as error:
+            logger.error(error)
+            continue
         for article in articlelist:
             if isinstance(article, etree._Element):
                 subelement = etree.tostring(article).decode()