gavinzli commited on
Commit
4a26d9b
·
1 Parent(s): b6a6edc

Handle TimeoutError in crawl function to improve error handling

Browse files
Files changed (2) hide show
  1. source/safe.py +1 -1
  2. source/stats.py +1 -1
source/safe.py CHANGED
@@ -34,7 +34,7 @@ def crawl(delta):
34
  html_text = text.decode("utf-8")
35
  page = etree.HTML(html_text)
36
  articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
37
- except urllib.error.URLError as error:
38
  logger.error(error)
39
  continue
40
  for article in articlelist:
 
34
  html_text = text.decode("utf-8")
35
  page = etree.HTML(html_text)
36
  articlelist = page.xpath("//div[contains(@class, 'list_conr')]/ul/li")
37
+ except (urllib.error.URLError, TimeoutError) as error:
38
  logger.error(error)
39
  continue
40
  for article in articlelist:
source/stats.py CHANGED
@@ -42,7 +42,7 @@ def crawl(delta):
42
  html_text = text.decode("utf-8")
43
  page = etree.HTML(html_text)
44
  articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
45
- except (urllib.error.URLError, http.client.IncompleteRead) as error:
46
  logger.info(error)
47
  if retries > 0:
48
  time.sleep(5) # Wait for 5 seconds before retrying
 
42
  html_text = text.decode("utf-8")
43
  page = etree.HTML(html_text)
44
  articlelist = page.xpath("//div[contains(@class, 'list-content')]/ul/li")
45
+ except (urllib.error.URLError, http.client.IncompleteRead, TimeoutError) as error:
46
  logger.info(error)
47
  if retries > 0:
48
  time.sleep(5) # Wait for 5 seconds before retrying