OxbridgeEconomics
commited on
Commit
·
d0ddd7b
1
Parent(s):
422b41b
commit
Browse files
utils.py
CHANGED
@@ -14,10 +14,10 @@ from googletrans import Translator
|
|
14 |
from transformers import pipeline
|
15 |
from PyPDF2 import PdfReader
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
AWS_ACCESS_KEY_ID="AKIAQFXZMGHQYXKWUDWR"
|
20 |
-
AWS_SECRET_ACCESS_KEY="D2A0IEVl5g3Ljbu0Y5iq9WuFETpDeoEpl69C+6xo"
|
21 |
|
22 |
analyzer = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
23 |
|
@@ -65,23 +65,6 @@ def encode(content):
|
|
65 |
text += line
|
66 |
return text
|
67 |
|
68 |
-
# def encode(content):
|
69 |
-
# """Encode Function"""
|
70 |
-
# text = ''
|
71 |
-
# for element in content:
|
72 |
-
# if isinstance(element, etree._Element):
|
73 |
-
# subelement = etree.tostring(element).decode()
|
74 |
-
# subpage = etree.HTML(subelement)
|
75 |
-
# tree = subpage.xpath('//text()')
|
76 |
-
# line = ''.join(translist(tree)).\
|
77 |
-
# replace('\n','').replace('\t','').replace('\r','').replace(' ','').strip()
|
78 |
-
# else:
|
79 |
-
# line = element
|
80 |
-
# text += line
|
81 |
-
# index = text.find('打印本页')
|
82 |
-
# if index != -1:
|
83 |
-
# text = text[:index]
|
84 |
-
|
85 |
def encode_content(content):
|
86 |
"""Encode Function"""
|
87 |
text = ''
|
@@ -205,7 +188,7 @@ def crawl(url, article):
|
|
205 |
def upsert_content(report):
|
206 |
"""Upsert the content records"""
|
207 |
dynamodb = get_db_connection()
|
208 |
-
table = dynamodb.Table('
|
209 |
# Define the item data
|
210 |
item = {
|
211 |
'id': str(report['id']),
|
|
|
14 |
from transformers import pipeline
|
15 |
from PyPDF2 import PdfReader
|
16 |
|
17 |
+
AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID']
|
18 |
+
AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY']
|
19 |
+
# AWS_ACCESS_KEY_ID="AKIAQFXZMGHQYXKWUDWR"
|
20 |
+
# AWS_SECRET_ACCESS_KEY="D2A0IEVl5g3Ljbu0Y5iq9WuFETpDeoEpl69C+6xo"
|
21 |
|
22 |
analyzer = pipeline("sentiment-analysis", model="ProsusAI/finbert")
|
23 |
|
|
|
65 |
text += line
|
66 |
return text
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def encode_content(content):
|
69 |
"""Encode Function"""
|
70 |
text = ''
|
|
|
188 |
def upsert_content(report):
|
189 |
"""Upsert the content records"""
|
190 |
dynamodb = get_db_connection()
|
191 |
+
table = dynamodb.Table('article_china')
|
192 |
# Define the item data
|
193 |
item = {
|
194 |
'id': str(report['id']),
|