Spaces:

Oxbridge-Economics
/

Data-Collection-China

Running

gavinzli commited on Mar 3

Commit

4a26d9b

1 Parent(s): b6a6edc

Handle TimeoutError in crawl function to improve error handling

Files changed (2) hide show

source/safe.py CHANGED Viewed

@@ -34,7 +34,7 @@ def crawl(delta):
             html_text = text.decode("utf-8")
             page = etree.HTML(html_text)
             articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
-        except urllib.error.URLError as error:
             logger.error(error)
             continue
         for article in articlelist:

             html_text = text.decode("utf-8")
             page = etree.HTML(html_text)
             articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
+        except (urllib.error.URLError, TimeoutError) as error:
             logger.error(error)
             continue
         for article in articlelist:

source/stats.py CHANGED Viewed

@@ -42,7 +42,7 @@ def crawl(delta):
                 html_text = text.decode("utf-8")
                 page = etree.HTML(html_text)
                 articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
-            except (urllib.error.URLError, http.client.IncompleteRead) as error:
                 logger.info(error)
                 if retries > 0:
                     time.sleep(5)  # Wait for 5 seconds before retrying

                 html_text = text.decode("utf-8")
                 page = etree.HTML(html_text)
                 articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
+            except (urllib.error.URLError, http.client.IncompleteRead, TimeoutError) as error:
                 logger.info(error)
                 if retries > 0:
                     time.sleep(5)  # Wait for 5 seconds before retrying