OxbridgeEconomics commited on
Commit
9176677
·
unverified ·
1 Parent(s): 54c7a12

Update pbc.py

Browse files
Files changed (1) hide show
  1. pbc.py +4 -11
pbc.py CHANGED
@@ -84,8 +84,6 @@ import boto3
84
  AWS_ACCESS_KEY_ID = "AKIAQFXZMGHQYXKWUDWR"
85
  AWS_SECRET_ACCESS_KEY = "D2A0IEVl5g3Ljbu0Y5iq9WuFETpDeoEpl69C+6xo"
86
 
87
- print(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
88
-
89
  def get_db_connection():
90
  """Get dynamoDB connection"""
91
  dynamodb = boto3.resource(
@@ -105,9 +103,9 @@ def upsert_content(report):
105
  'id': str(report['id']),
106
  'site': report['site'],
107
  'title': report['title'],
108
- 'originalSite': report['originalSite'],
109
- 'originalTitle': report['originalTitle'],
110
- 'originalContent': report['originalContent'],
111
  'category': report['category'],
112
  # 'author': report['author'],
113
  'content': report['content'],
@@ -132,7 +130,6 @@ for categoryu_url in categoryu_urls:
132
  for url in urls:
133
  try:
134
  url = "http://www.pbc.gov.cn" + url
135
- print(url)
136
  article = {}
137
  response = requests.get(url)
138
  response.encoding = 'utf-8'
@@ -142,18 +139,15 @@ for categoryu_url in categoryu_urls:
142
  for element in article['originalContent'].split("。"):
143
  content_eng += translator.translate(element, dest='en').text + ' '
144
  article['content'] = content_eng
145
- print(article['content'])
146
  article['site'] = "The People's Bank of China"
147
  article['originalSite'] = "中国人民银行"
148
  article['originalTitle'] = page.xpath("//title/text()")[0]
149
- print(article['originalTitle'])
150
  article['title'] = translator.translate(article['originalTitle'], dest='en').text
151
  article['url'] = url
152
  article['category']= "Policy Interpretation"
153
  article['publishDate'] = datemodifier(page.xpath("//meta[@name = '页面生成时间']/@content")[0])
154
  parsed_datetime = datetime.strptime(time.strftime("%Y-%m-%d", time.strptime(article['publishDate'],"%Y-%m-%d")), "%Y-%m-%d")
155
- if parsed_datetime < (datetime.today() - timedelta(days=180)):
156
- print(article['publishDate'])
157
  continue
158
  article['id'] = uuid.uuid5(uuid.NAMESPACE_OID, article['title']+article['publishDate'])
159
  label_dict = {
@@ -178,7 +172,6 @@ for categoryu_url in categoryu_urls:
178
  sentiment_score = sentiment_score + 0
179
  article['sentimentScore'] = sentiment_score
180
  article['sentimentLabel'] = label_dict[sentiment_label]
181
- print(article)
182
  upsert_content(article)
183
  except Exception as error:
184
  print(error)
 
84
  AWS_ACCESS_KEY_ID = "AKIAQFXZMGHQYXKWUDWR"
85
  AWS_SECRET_ACCESS_KEY = "D2A0IEVl5g3Ljbu0Y5iq9WuFETpDeoEpl69C+6xo"
86
 
 
 
87
  def get_db_connection():
88
  """Get dynamoDB connection"""
89
  dynamodb = boto3.resource(
 
103
  'id': str(report['id']),
104
  'site': report['site'],
105
  'title': report['title'],
106
+ # 'originalSite': report['originalSite'],
107
+ # 'originalTitle': report['originalTitle'],
108
+ # 'originalContent': report['originalContent'],
109
  'category': report['category'],
110
  # 'author': report['author'],
111
  'content': report['content'],
 
130
  for url in urls:
131
  try:
132
  url = "http://www.pbc.gov.cn" + url
 
133
  article = {}
134
  response = requests.get(url)
135
  response.encoding = 'utf-8'
 
139
  for element in article['originalContent'].split("。"):
140
  content_eng += translator.translate(element, dest='en').text + ' '
141
  article['content'] = content_eng
 
142
  article['site'] = "The People's Bank of China"
143
  article['originalSite'] = "中国人民银行"
144
  article['originalTitle'] = page.xpath("//title/text()")[0]
 
145
  article['title'] = translator.translate(article['originalTitle'], dest='en').text
146
  article['url'] = url
147
  article['category']= "Policy Interpretation"
148
  article['publishDate'] = datemodifier(page.xpath("//meta[@name = '页面生成时间']/@content")[0])
149
  parsed_datetime = datetime.strptime(time.strftime("%Y-%m-%d", time.strptime(article['publishDate'],"%Y-%m-%d")), "%Y-%m-%d")
150
+ if parsed_datetime < (datetime.today() - timedelta(days=183)):
 
151
  continue
152
  article['id'] = uuid.uuid5(uuid.NAMESPACE_OID, article['title']+article['publishDate'])
153
  label_dict = {
 
172
  sentiment_score = sentiment_score + 0
173
  article['sentimentScore'] = sentiment_score
174
  article['sentimentLabel'] = label_dict[sentiment_label]
 
175
  upsert_content(article)
176
  except Exception as error:
177
  print(error)