Muhammad Abdur Rahman Saad commited on
Commit
4259f95
·
1 Parent(s): 91fadcf

fix logging issue

Browse files
source/cbirc.py CHANGED
@@ -1,10 +1,9 @@
1
  """Module to crawl the website 'https://www.cbirc.gov.cn' to fetch and process articles."""
2
  import json
3
- import logging
4
  import time
5
  import uuid
6
  from datetime import datetime, timedelta
7
- from prefect import task
8
 
9
  from controllers.summarizer import summarize
10
  from controllers.utils import (
@@ -29,7 +28,8 @@ def crawl(delta):
29
  Raises:
30
  None
31
  """
32
- logging.info("cbirc.gov.cn")
 
33
  i = 1
34
  while i > -1:
35
  category_url= f"https://www.cbirc.gov.cn/cn/static/data/DocInfo/SelectDocByItemIdAndChild/data_itemId=917,pageIndex={i},pageSize=18.json"
@@ -73,4 +73,4 @@ def crawl(delta):
73
  article['subtitle'] = summarize(article['content'])
74
  update_content(article)
75
  except Exception as error:
76
- logging.error(error)
 
1
  """Module to crawl the website 'https://www.cbirc.gov.cn' to fetch and process articles."""
2
  import json
 
3
  import time
4
  import uuid
5
  from datetime import datetime, timedelta
6
+ from prefect import task, get_run_logger
7
 
8
  from controllers.summarizer import summarize
9
  from controllers.utils import (
 
28
  Raises:
29
  None
30
  """
31
+ logger = get_run_logger()
32
+ logger.info("cbirc.gov.cn")
33
  i = 1
34
  while i > -1:
35
  category_url= f"https://www.cbirc.gov.cn/cn/static/data/DocInfo/SelectDocByItemIdAndChild/data_itemId=917,pageIndex={i},pageSize=18.json"
 
73
  article['subtitle'] = summarize(article['content'])
74
  update_content(article)
75
  except Exception as error:
76
+ logger.error(error)
source/csrc.py CHANGED
@@ -1,12 +1,11 @@
1
  """Module to crawl the website 'https://www.csrc.gov.cn' to fetch and process articles."""
2
  import json
3
- import logging
4
  import time
5
  import urllib.request
6
  import uuid
7
  from datetime import datetime, timedelta
8
 
9
- from prefect import task
10
  from lxml import etree
11
 
12
  from controllers.summarizer import summarize
@@ -33,7 +32,8 @@ def crawl(delta):
33
  Raises:
34
  None
35
  """
36
- logging.info("csrc.gov.cn")
 
37
  i = 1
38
  while i > -1:
39
  try:
@@ -42,7 +42,7 @@ def crawl(delta):
42
  else:
43
  category_url = f"http://www.csrc.gov.cn/csrc/c100039/common_list_{i}.shtml"
44
  i = i + 1
45
- logging.info(category_url)
46
  req = urllib.request.urlopen(category_url)
47
  text = req.read()
48
  html_text = text.decode("utf-8")
@@ -66,20 +66,20 @@ def crawl(delta):
66
  article = {}
67
  url = "http://www.csrc.gov.cn" + url
68
  article['category'] = "Policy Interpretation"
69
- logging.info(url)
70
  crawl_by_url(url, article)
71
  except Exception as error:
72
- logging.error(error)
73
  except Exception as error:
74
  i = -1
75
- logging.error(error)
76
 
77
  i = 1
78
  while i > -1:
79
  category_url = f"http://www.csrc.gov.cn/searchList/a1a078ee0bc54721ab6b148884c784a8?_isAgg=true&_isJson=true&_pageSize=18&_template=index&_rangeTimeGte=&_channelName=&page={i}"
80
  i = i + 1
81
  try:
82
- logging.info(category_url)
83
  content = fetch_url(category_url)
84
  if content is None:
85
  i = -1
@@ -87,7 +87,7 @@ def crawl(delta):
87
  reportinfo = json.loads(content)
88
  if len(reportinfo['data']['results']) == 0:
89
  i = -1
90
- logging.info(len(reportinfo['data']['results']))
91
  for article in reportinfo['data']['results']:
92
  parsed_datetime = datetime.strptime(
93
  time.strftime(
@@ -122,8 +122,8 @@ def crawl(delta):
122
  article['id'] = uuid.uuid5(
123
  uuid.NAMESPACE_OID,
124
  article['titleCN'] + article['publishDate'])
125
- logging.info(article)
126
  # update_content(article)
127
  except Exception as error:
128
  i = -1
129
- logging.error(error)
 
1
  """Module to crawl the website 'https://www.csrc.gov.cn' to fetch and process articles."""
2
  import json
 
3
  import time
4
  import urllib.request
5
  import uuid
6
  from datetime import datetime, timedelta
7
 
8
+ from prefect import task, get_run_logger
9
  from lxml import etree
10
 
11
  from controllers.summarizer import summarize
 
32
  Raises:
33
  None
34
  """
35
+ logger = get_run_logger()
36
+ logger.info("csrc.gov.cn")
37
  i = 1
38
  while i > -1:
39
  try:
 
42
  else:
43
  category_url = f"http://www.csrc.gov.cn/csrc/c100039/common_list_{i}.shtml"
44
  i = i + 1
45
+ logger.info(category_url)
46
  req = urllib.request.urlopen(category_url)
47
  text = req.read()
48
  html_text = text.decode("utf-8")
 
66
  article = {}
67
  url = "http://www.csrc.gov.cn" + url
68
  article['category'] = "Policy Interpretation"
69
+ logger.info(url)
70
  crawl_by_url(url, article)
71
  except Exception as error:
72
+ logger.error(error)
73
  except Exception as error:
74
  i = -1
75
+ logger.error(error)
76
 
77
  i = 1
78
  while i > -1:
79
  category_url = f"http://www.csrc.gov.cn/searchList/a1a078ee0bc54721ab6b148884c784a8?_isAgg=true&_isJson=true&_pageSize=18&_template=index&_rangeTimeGte=&_channelName=&page={i}"
80
  i = i + 1
81
  try:
82
+ logger.info(category_url)
83
  content = fetch_url(category_url)
84
  if content is None:
85
  i = -1
 
87
  reportinfo = json.loads(content)
88
  if len(reportinfo['data']['results']) == 0:
89
  i = -1
90
+ logger.info(len(reportinfo['data']['results']))
91
  for article in reportinfo['data']['results']:
92
  parsed_datetime = datetime.strptime(
93
  time.strftime(
 
122
  article['id'] = uuid.uuid5(
123
  uuid.NAMESPACE_OID,
124
  article['titleCN'] + article['publishDate'])
125
+ logger.info(article)
126
  # update_content(article)
127
  except Exception as error:
128
  i = -1
129
+ logger.error(error)
source/eastmoney.py CHANGED
@@ -1,12 +1,11 @@
1
  """Module to crawl the website 'eastmoney.com' to fetch and process articles."""
2
  import json
3
- import logging
4
  import urllib.request
5
  import uuid
6
  from datetime import datetime, timedelta
7
  from urllib.parse import urlparse
8
 
9
- from prefect import task
10
  from lxml import etree
11
 
12
  from controllers.summarizer import summarize
@@ -55,7 +54,7 @@ def _crawl(url, article):
55
  article['site'] = translate(article['orgSName'])
56
  else:
57
  article['site'] = translate(article['orgName'])
58
- logging.info(article)
59
  article['titleCN'] = article['title']
60
  article['title'] = translate(article['title'])
61
  article['author'] = translate(article['researcher'])
@@ -68,7 +67,7 @@ def _crawl(url, article):
68
  for element in contentcn.split("\n"):
69
  contenteng += translate(element) + '\n'
70
  article['content'] = repr(contenteng)[1:-1].strip()
71
- logging.info(article)
72
  article['subtitle'] = summarize(article['content'])
73
  article['authorid'] = uuid.uuid5(uuid.NAMESPACE_OID, article['author'])
74
  article['publishDate'] = datemodifier(
@@ -77,7 +76,7 @@ def _crawl(url, article):
77
  article['titleCN'] + article['publishDate'])
78
  article['sentimentScore'], article[
79
  'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
80
- logging.info(article)
81
  extract_reference(article)
82
  update_content(article)
83
 
@@ -95,7 +94,8 @@ def crawl(delta):
95
  Raises:
96
  None
97
  """
98
- logging.info("data.eastmoney.com")
 
99
  today = datetime.today().strftime('%Y-%m-%d')
100
  i = 0
101
  while i > -1:
@@ -123,8 +123,8 @@ def crawl(delta):
123
  url = f"https://data.eastmoney.com/report/zw_macresearch.jshtml?encodeUrl={article['encodeUrl']}"
124
  _crawl(url, article)
125
  except Exception as error:
126
- logging.error(error)
127
  else:
128
  i = -1
129
  else:
130
- logging.error("Failed to fetch URL: %s", category_url)
 
1
  """Module to crawl the website 'eastmoney.com' to fetch and process articles."""
2
  import json
 
3
  import urllib.request
4
  import uuid
5
  from datetime import datetime, timedelta
6
  from urllib.parse import urlparse
7
 
8
+ from prefect import task, get_run_logger
9
  from lxml import etree
10
 
11
  from controllers.summarizer import summarize
 
54
  article['site'] = translate(article['orgSName'])
55
  else:
56
  article['site'] = translate(article['orgName'])
57
+ print(f'INFO - {article}')
58
  article['titleCN'] = article['title']
59
  article['title'] = translate(article['title'])
60
  article['author'] = translate(article['researcher'])
 
67
  for element in contentcn.split("\n"):
68
  contenteng += translate(element) + '\n'
69
  article['content'] = repr(contenteng)[1:-1].strip()
70
+ print(f'INFO - {article}')
71
  article['subtitle'] = summarize(article['content'])
72
  article['authorid'] = uuid.uuid5(uuid.NAMESPACE_OID, article['author'])
73
  article['publishDate'] = datemodifier(
 
76
  article['titleCN'] + article['publishDate'])
77
  article['sentimentScore'], article[
78
  'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
79
+ print(f'INFO - {article}')
80
  extract_reference(article)
81
  update_content(article)
82
 
 
94
  Raises:
95
  None
96
  """
97
+ logger = get_run_logger()
98
+ logger.info("data.eastmoney.com")
99
  today = datetime.today().strftime('%Y-%m-%d')
100
  i = 0
101
  while i > -1:
 
123
  url = f"https://data.eastmoney.com/report/zw_macresearch.jshtml?encodeUrl={article['encodeUrl']}"
124
  _crawl(url, article)
125
  except Exception as error:
126
+ logger.error(error)
127
  else:
128
  i = -1
129
  else:
130
+ logger.error("Failed to fetch URL: %s", category_url)
source/gov.py CHANGED
@@ -1,11 +1,10 @@
1
  """Module to crawl the website 'https://www.gov.cn' to fetch and process articles."""
2
- import logging
3
  import time
4
  import urllib.request
5
  from datetime import datetime, timedelta
6
 
7
  from lxml import etree
8
- from prefect import task
9
 
10
  from controllers.utils import crawl_by_url
11
 
@@ -20,7 +19,8 @@ def crawl(delta):
20
  Returns:
21
  None
22
  """
23
- logging.info("gov.cn")
 
24
  i = 0
25
  while i > -1:
26
  if i == 0:
@@ -53,7 +53,7 @@ def crawl(delta):
53
  article['category'] = "Policy Interpretation"
54
  crawl_by_url(url, article)
55
  except Exception as error:
56
- logging.error(error)
57
  i = 0
58
  while i > -1:
59
  if i == 0:
@@ -86,4 +86,4 @@ def crawl(delta):
86
  article['site'] = "State Council of China"
87
  crawl_by_url(url, article)
88
  except Exception as error:
89
- logging.error(error)
 
1
  """Module to crawl the website 'https://www.gov.cn' to fetch and process articles."""
 
2
  import time
3
  import urllib.request
4
  from datetime import datetime, timedelta
5
 
6
  from lxml import etree
7
+ from prefect import task, get_run_logger
8
 
9
  from controllers.utils import crawl_by_url
10
 
 
19
  Returns:
20
  None
21
  """
22
+ logger = get_run_logger()
23
+ logger.info("gov.cn")
24
  i = 0
25
  while i > -1:
26
  if i == 0:
 
53
  article['category'] = "Policy Interpretation"
54
  crawl_by_url(url, article)
55
  except Exception as error:
56
+ logger.error(error)
57
  i = 0
58
  while i > -1:
59
  if i == 0:
 
86
  article['site'] = "State Council of China"
87
  crawl_by_url(url, article)
88
  except Exception as error:
89
+ logger.error(error)
source/mof.py CHANGED
@@ -1,11 +1,10 @@
1
  """Module to crawl the website 'https://www.mof.gov.cn' to fetch and process articles."""
2
- import logging
3
  import time
4
  import urllib.request
5
  from datetime import datetime, timedelta
6
 
7
  from lxml import etree
8
- from prefect import task
9
 
10
  from controllers.utils import crawl_by_url
11
 
@@ -20,7 +19,8 @@ def crawl(delta):
20
  Returns:
21
  None
22
  """
23
- logging.info("mof.gov.cn")
 
24
  i = 0
25
  while i > -1:
26
  if i == 0:
@@ -56,7 +56,7 @@ def crawl(delta):
56
  article['category'] = "Financial News"
57
  crawl_by_url(url, article)
58
  except Exception as error:
59
- logging.error(error)
60
 
61
  i = 0
62
  while i > -1:
@@ -91,4 +91,4 @@ def crawl(delta):
91
  article['category'] = "Policy Interpretation"
92
  crawl_by_url(url, article)
93
  except Exception as error:
94
- logging.error(error)
 
1
  """Module to crawl the website 'https://www.mof.gov.cn' to fetch and process articles."""
 
2
  import time
3
  import urllib.request
4
  from datetime import datetime, timedelta
5
 
6
  from lxml import etree
7
+ from prefect import task, get_run_logger
8
 
9
  from controllers.utils import crawl_by_url
10
 
 
19
  Returns:
20
  None
21
  """
22
+ logger = get_run_logger()
23
+ logger.info("mof.gov.cn")
24
  i = 0
25
  while i > -1:
26
  if i == 0:
 
56
  article['category'] = "Financial News"
57
  crawl_by_url(url, article)
58
  except Exception as error:
59
+ logger.error(error)
60
 
61
  i = 0
62
  while i > -1:
 
91
  article['category'] = "Policy Interpretation"
92
  crawl_by_url(url, article)
93
  except Exception as error:
94
+ logger.error(error)
source/mofcom.py CHANGED
@@ -1,11 +1,10 @@
1
  """Module to crawl the website 'https://www.mofcom.gov.cn' to fetch and process articles."""
2
- import logging
3
  import time
4
  import urllib.request
5
  from datetime import datetime, timedelta
6
 
7
  from lxml import etree
8
- from prefect import task
9
 
10
  from controllers.utils import crawl_by_url
11
 
@@ -20,7 +19,8 @@ def crawl(delta):
20
  Returns:
21
  None
22
  """
23
- logging.info("mofcom.gov.cn")
 
24
  categories = ['jdzhsw', 'jdgnmy', 'jddwmy', 'jdtzhz']
25
  for category in categories:
26
  i = 1
@@ -60,7 +60,7 @@ def crawl(delta):
60
  article['category'] = "Policy Release"
61
  crawl_by_url(url, article)
62
  except Exception as error:
63
- logging.error(error)
64
  except Exception as error:
65
  i = -1
66
- logging.error(error)
 
1
  """Module to crawl the website 'https://www.mofcom.gov.cn' to fetch and process articles."""
 
2
  import time
3
  import urllib.request
4
  from datetime import datetime, timedelta
5
 
6
  from lxml import etree
7
+ from prefect import task, get_run_logger
8
 
9
  from controllers.utils import crawl_by_url
10
 
 
19
  Returns:
20
  None
21
  """
22
+ logger = get_run_logger()
23
+ logger.info("mofcom.gov.cn")
24
  categories = ['jdzhsw', 'jdgnmy', 'jddwmy', 'jdtzhz']
25
  for category in categories:
26
  i = 1
 
60
  article['category'] = "Policy Release"
61
  crawl_by_url(url, article)
62
  except Exception as error:
63
+ logger.error(error)
64
  except Exception as error:
65
  i = -1
66
+ logger.error(error)
source/ndrc.py CHANGED
@@ -1,11 +1,10 @@
1
  """Module to crawl the website 'https://www.ndrc.gov.cn' to fetch and process articles."""
2
- import logging
3
  import time
4
  import urllib.request
5
  from datetime import datetime, timedelta
6
 
7
  from lxml import etree
8
- from prefect import task
9
 
10
  from controllers.utils import crawl_by_url
11
 
@@ -23,7 +22,8 @@ def crawl(delta):
23
  Raises:
24
  None
25
  """
26
- logging.info("ndrc.gov.cn")
 
27
  i = 0
28
  while i > -1:
29
  if i == 0:
@@ -65,4 +65,4 @@ def crawl(delta):
65
  article['category'] = "Policy Interpretation"
66
  crawl_by_url(url, article)
67
  except Exception as error:
68
- logging.error(error)
 
1
  """Module to crawl the website 'https://www.ndrc.gov.cn' to fetch and process articles."""
 
2
  import time
3
  import urllib.request
4
  from datetime import datetime, timedelta
5
 
6
  from lxml import etree
7
+ from prefect import task, get_run_logger
8
 
9
  from controllers.utils import crawl_by_url
10
 
 
22
  Raises:
23
  None
24
  """
25
+ logger = get_run_logger()
26
+ logger.info("ndrc.gov.cn")
27
  i = 0
28
  while i > -1:
29
  if i == 0:
 
65
  article['category'] = "Policy Interpretation"
66
  crawl_by_url(url, article)
67
  except Exception as error:
68
+ logger.error(error)
source/safe.py CHANGED
@@ -1,11 +1,10 @@
1
  """Module to crawl the website 'https://www.safe.gov.cn' to fetch and process articles."""
2
- import logging
3
  import time
4
  import urllib.request
5
  from datetime import datetime, timedelta
6
 
7
  from lxml import etree
8
- from prefect import task
9
 
10
  from controllers.utils import crawl_by_url
11
 
@@ -20,7 +19,8 @@ def crawl(delta):
20
  Returns:
21
  None
22
  """
23
- logging.info("safe.gov.cn")
 
24
  i = 1
25
  while i > -1:
26
  if i == 1:
@@ -52,7 +52,7 @@ def crawl(delta):
52
  article['category'] = "Policy Interpretation"
53
  crawl_by_url(url, article)
54
  except Exception as error:
55
- logging.error(error)
56
 
57
  i = 1
58
  while i > -1:
@@ -85,4 +85,4 @@ def crawl(delta):
85
  article['category'] = "Data Interpretation"
86
  crawl_by_url(url, article)
87
  except Exception as error:
88
- logging.error(error)
 
1
  """Module to crawl the website 'https://www.safe.gov.cn' to fetch and process articles."""
 
2
  import time
3
  import urllib.request
4
  from datetime import datetime, timedelta
5
 
6
  from lxml import etree
7
+ from prefect import task, get_run_logger
8
 
9
  from controllers.utils import crawl_by_url
10
 
 
19
  Returns:
20
  None
21
  """
22
+ logger = get_run_logger()
23
+ logger.info("safe.gov.cn")
24
  i = 1
25
  while i > -1:
26
  if i == 1:
 
52
  article['category'] = "Policy Interpretation"
53
  crawl_by_url(url, article)
54
  except Exception as error:
55
+ logger.error(error)
56
 
57
  i = 1
58
  while i > -1:
 
85
  article['category'] = "Data Interpretation"
86
  crawl_by_url(url, article)
87
  except Exception as error:
88
+ logger.error(error)
source/stats.py CHANGED
@@ -1,11 +1,10 @@
1
  """Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
2
- import logging
3
  import time
4
  import urllib.request
5
  from datetime import datetime, timedelta
6
 
7
  from lxml import etree
8
- from prefect import task
9
 
10
  from controllers.utils import crawl_by_url, encode
11
 
@@ -23,7 +22,8 @@ def crawl(delta):
23
  Raises:
24
  None
25
  """
26
- logging.info("stats.gov.hk")
 
27
  i = 0
28
  while i > -1:
29
  if i == 0:
@@ -55,4 +55,4 @@ def crawl(delta):
55
  article['category'] = "Data Interpretation"
56
  crawl_by_url(url, article)
57
  except Exception as error:
58
- logging.info(error)
 
1
  """Module to crawl the website 'https://www.stats.gov.cn' to fetch and process articles."""
 
2
  import time
3
  import urllib.request
4
  from datetime import datetime, timedelta
5
 
6
  from lxml import etree
7
+ from prefect import task, get_run_logger
8
 
9
  from controllers.utils import crawl_by_url, encode
10
 
 
22
  Raises:
23
  None
24
  """
25
+ logger = get_run_logger()
26
+ logger.info("stats.gov.hk")
27
  i = 0
28
  while i > -1:
29
  if i == 0:
 
55
  article['category'] = "Data Interpretation"
56
  crawl_by_url(url, article)
57
  except Exception as error:
58
+ logger.info(error)