OxbridgeEconomics
commited on
Commit
·
d3a786a
1
Parent(s):
2296ffd
commit
Browse files
ndrc.py
CHANGED
@@ -220,7 +220,7 @@ while i > -1:
|
|
220 |
article['originalContent'] = encode(page.xpath("//div[contains(@id, 'UCAP-CONTENT')]//p"))
|
221 |
content_eng = ''
|
222 |
for element in article['originalContent'].split("。"):
|
223 |
-
|
224 |
article['content'] = content_eng
|
225 |
article['site'] = "State Council"
|
226 |
article['originalSite'] = "国务院"
|
@@ -229,6 +229,24 @@ while i > -1:
|
|
229 |
article['url'] = url
|
230 |
article['category']= "Policy Release"
|
231 |
article['publishDate'] = datemodifier_gov(page.xpath("//meta[@name = 'firstpublishedtime']/@content")[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
else:
|
233 |
url = url.replace("../../", "https://www.ndrc.gov.cn/xxgk/jd/jd/")
|
234 |
url = url.replace("./", "https://www.ndrc.gov.cn/xxgk/jd/jd/")
|
|
|
220 |
article['originalContent'] = encode(page.xpath("//div[contains(@id, 'UCAP-CONTENT')]//p"))
|
221 |
content_eng = ''
|
222 |
for element in article['originalContent'].split("。"):
|
223 |
+
content_eng += translator.translate(element, dest='en').text + ' '
|
224 |
article['content'] = content_eng
|
225 |
article['site'] = "State Council"
|
226 |
article['originalSite'] = "国务院"
|
|
|
229 |
article['url'] = url
|
230 |
article['category']= "Policy Release"
|
231 |
article['publishDate'] = datemodifier_gov(page.xpath("//meta[@name = 'firstpublishedtime']/@content")[0])
|
232 |
+
elif "/zcfb/tz/" in url:
|
233 |
+
url = url.replace("../../zcfb/tz/", "https://www.ndrc.gov.cn/xxgk/zcfb/tz/")
|
234 |
+
req = urllib.request.urlopen(url)
|
235 |
+
text = req.read()
|
236 |
+
html_text = text.decode("utf-8")
|
237 |
+
page = etree.HTML(html_text)
|
238 |
+
article['originalContent'] = encode(page.xpath("//div[contains(@class, 'TRS_Editor')]//p"))
|
239 |
+
content_eng = ''
|
240 |
+
for element in article['originalContent'].split("。"):
|
241 |
+
content_eng += translator.translate(element, dest='en').text + ' '
|
242 |
+
article['content'] = content_eng
|
243 |
+
article['site'] = "National Development and Reform Commission"
|
244 |
+
article['originalSite'] = "国家发展和改革委员会"
|
245 |
+
article['originalTitle'] = page.xpath("//meta[@name = 'ArticleTitle']/@content")[0]
|
246 |
+
article['title'] = translator.translate(article['originalTitle'], dest='en').text
|
247 |
+
article['url'] = url
|
248 |
+
article['category']= "Policy Release"
|
249 |
+
article['publishDate'] = datemodifier(page.xpath("//meta[@name = 'PubDate']/@content")[0])
|
250 |
else:
|
251 |
url = url.replace("../../", "https://www.ndrc.gov.cn/xxgk/jd/jd/")
|
252 |
url = url.replace("./", "https://www.ndrc.gov.cn/xxgk/jd/jd/")
|