OxbridgeEconomics
commited on
Commit
·
e1d71ff
1
Parent(s):
9f429a0
commit
Browse files
daily.py
CHANGED
@@ -354,32 +354,37 @@ for category in categories:
|
|
354 |
else:
|
355 |
URL = f"http://www.mofcom.gov.cn/article/zcjd/{category}/?{i}"
|
356 |
i = i + 1
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
|
|
|
|
|
|
|
|
|
|
383 |
|
384 |
print("ndrc.gov.cn")
|
385 |
i = 0
|
|
|
354 |
else:
|
355 |
URL = f"http://www.mofcom.gov.cn/article/zcjd/{category}/?{i}"
|
356 |
i = i + 1
|
357 |
+
try:
|
358 |
+
req = urllib.request.urlopen(URL)
|
359 |
+
text = req.read()
|
360 |
+
html_text = text.decode("utf-8")
|
361 |
+
page = etree.HTML(html_text)
|
362 |
+
articlelist = page.xpath("//section[contains(@class, 'listCon iListCon f-mt30')]/ul/li")
|
363 |
+
for article in articlelist:
|
364 |
+
if isinstance(article, etree._Element):
|
365 |
+
subelement = etree.tostring(article).decode()
|
366 |
+
subpage = etree.HTML(subelement)
|
367 |
+
date = subpage.xpath("//span/text()")[0]
|
368 |
+
parsed_datetime = datetime.strptime(time.strftime("%Y-%m-%d", time.strptime(date,"%Y-%m-%d %H:%M:%S")), "%Y-%m-%d")
|
369 |
+
if parsed_datetime < (datetime.today() - timedelta(days=DELTA)):
|
370 |
+
i = -1
|
371 |
+
else:
|
372 |
+
urls = subpage.xpath("//a/@href")
|
373 |
+
for url in urls:
|
374 |
+
try:
|
375 |
+
article = {}
|
376 |
+
if '/article/zcjd' in url:
|
377 |
+
url = "http://www.mofcom.gov.cn" + url
|
378 |
+
article['category']= "Policy Interpretation"
|
379 |
+
else:
|
380 |
+
article['category']= "Policy Release"
|
381 |
+
crawl(url, article)
|
382 |
+
except Exception as error:
|
383 |
+
print(error)
|
384 |
+
except Exception as error:
|
385 |
+
i = -1
|
386 |
+
print(error)
|
387 |
+
|
388 |
|
389 |
print("ndrc.gov.cn")
|
390 |
i = 0
|