gavinzli commited on
Commit
1c87e0d
·
1 Parent(s): 23b7557

Handle missing reference ID by setting it to None in article data and refactor URL construction for clarity

Browse files
Files changed (1) hide show
  1. source/eastmoney.py +4 -2
source/eastmoney.py CHANGED
@@ -94,6 +94,8 @@ def _crawl(url, article, retries=3):
94
  reference_id = extract_reference(article)
95
  if reference_id:
96
  article['referenceid'] = reference_id
 
 
97
  update_content(article)
98
 
99
  @task(name = "Data Collection - eastmoney", log_prints = True)
@@ -138,8 +140,8 @@ def crawl(delta):
138
  i = i + 1
139
  for article in reportinfo['data']:
140
  try:
141
- domain = "https://data.eastmoney.com"
142
- url = f"{domain}/report/zw_macresearch.jshtml?encodeUrl={article['encodeUrl']}"
143
  _crawl(url, article)
144
  except (urllib.error.URLError, json.JSONDecodeError, KeyError) as error:
145
  logger.error(error)
 
94
  reference_id = extract_reference(article)
95
  if reference_id:
96
  article['referenceid'] = reference_id
97
+ else:
98
+ article['referenceid'] = None
99
  update_content(article)
100
 
101
  @task(name = "Data Collection - eastmoney", log_prints = True)
 
140
  i = i + 1
141
  for article in reportinfo['data']:
142
  try:
143
+ link = "https://data.eastmoney.com/report/zw_macresearch.jshtml"
144
+ url = f"{link}?encodeUrl={article['encodeUrl']}"
145
  _crawl(url, article)
146
  except (urllib.error.URLError, json.JSONDecodeError, KeyError) as error:
147
  logger.error(error)