OxbridgeEconomics commited on
Commit
d3a786a
·
1 Parent(s): 2296ffd
Files changed (1) hide show
  1. ndrc.py +19 -1
ndrc.py CHANGED
@@ -220,7 +220,7 @@ while i > -1:
220
  article['originalContent'] = encode(page.xpath("//div[contains(@id, 'UCAP-CONTENT')]//p"))
221
  content_eng = ''
222
  for element in article['originalContent'].split("。"):
223
- content_eng += translator.translate(element, dest='en').text + ' '
224
  article['content'] = content_eng
225
  article['site'] = "State Council"
226
  article['originalSite'] = "国务院"
@@ -229,6 +229,24 @@ while i > -1:
229
  article['url'] = url
230
  article['category']= "Policy Release"
231
  article['publishDate'] = datemodifier_gov(page.xpath("//meta[@name = 'firstpublishedtime']/@content")[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  else:
233
  url = url.replace("../../", "https://www.ndrc.gov.cn/xxgk/jd/jd/")
234
  url = url.replace("./", "https://www.ndrc.gov.cn/xxgk/jd/jd/")
 
220
  article['originalContent'] = encode(page.xpath("//div[contains(@id, 'UCAP-CONTENT')]//p"))
221
  content_eng = ''
222
  for element in article['originalContent'].split("。"):
223
+ content_eng += translator.translate(element, dest='en').text + ' '
224
  article['content'] = content_eng
225
  article['site'] = "State Council"
226
  article['originalSite'] = "国务院"
 
229
  article['url'] = url
230
  article['category']= "Policy Release"
231
  article['publishDate'] = datemodifier_gov(page.xpath("//meta[@name = 'firstpublishedtime']/@content")[0])
232
+ elif "/zcfb/tz/" in url:
233
+ url = url.replace("../../zcfb/tz/", "https://www.ndrc.gov.cn/xxgk/zcfb/tz/")
234
+ req = urllib.request.urlopen(url)
235
+ text = req.read()
236
+ html_text = text.decode("utf-8")
237
+ page = etree.HTML(html_text)
238
+ article['originalContent'] = encode(page.xpath("//div[contains(@class, 'TRS_Editor')]//p"))
239
+ content_eng = ''
240
+ for element in article['originalContent'].split("。"):
241
+ content_eng += translator.translate(element, dest='en').text + ' '
242
+ article['content'] = content_eng
243
+ article['site'] = "National Development and Reform Commission"
244
+ article['originalSite'] = "国家发展和改革委员会"
245
+ article['originalTitle'] = page.xpath("//meta[@name = 'ArticleTitle']/@content")[0]
246
+ article['title'] = translator.translate(article['originalTitle'], dest='en').text
247
+ article['url'] = url
248
+ article['category']= "Policy Release"
249
+ article['publishDate'] = datemodifier(page.xpath("//meta[@name = 'PubDate']/@content")[0])
250
  else:
251
  url = url.replace("../../", "https://www.ndrc.gov.cn/xxgk/jd/jd/")
252
  url = url.replace("./", "https://www.ndrc.gov.cn/xxgk/jd/jd/")