Handle missing reference ID by setting it to None in article data and refactor URL construction for clarity
Browse files- source/eastmoney.py +4 -2
source/eastmoney.py
CHANGED
@@ -94,6 +94,8 @@ def _crawl(url, article, retries=3):
|
|
94 |
reference_id = extract_reference(article)
|
95 |
if reference_id:
|
96 |
article['referenceid'] = reference_id
|
|
|
|
|
97 |
update_content(article)
|
98 |
|
99 |
@task(name = "Data Collection - eastmoney", log_prints = True)
|
@@ -138,8 +140,8 @@ def crawl(delta):
|
|
138 |
i = i + 1
|
139 |
for article in reportinfo['data']:
|
140 |
try:
|
141 |
-
|
142 |
-
url = f"{
|
143 |
_crawl(url, article)
|
144 |
except (urllib.error.URLError, json.JSONDecodeError, KeyError) as error:
|
145 |
logger.error(error)
|
|
|
94 |
reference_id = extract_reference(article)
|
95 |
if reference_id:
|
96 |
article['referenceid'] = reference_id
|
97 |
+
else:
|
98 |
+
article['referenceid'] = None
|
99 |
update_content(article)
|
100 |
|
101 |
@task(name = "Data Collection - eastmoney", log_prints = True)
|
|
|
140 |
i = i + 1
|
141 |
for article in reportinfo['data']:
|
142 |
try:
|
143 |
+
link = "https://data.eastmoney.com/report/zw_macresearch.jshtml"
|
144 |
+
url = f"{link}?encodeUrl={article['encodeUrl']}"
|
145 |
_crawl(url, article)
|
146 |
except (urllib.error.URLError, json.JSONDecodeError, KeyError) as error:
|
147 |
logger.error(error)
|