OxbridgeEconomics
commited on
Commit
·
270ad28
1
Parent(s):
c633e86
commit
Browse files- controllers/summarizer.py +0 -2
- controllers/utils.py +5 -5
- source/eastmoney.py +2 -1
controllers/summarizer.py
CHANGED
@@ -27,7 +27,6 @@ def summarize(text):
|
|
27 |
Raises:
|
28 |
None
|
29 |
"""
|
30 |
-
logging.info(text)
|
31 |
llm = AzureChatOpenAI(
|
32 |
openai_api_version="2023-06-01-preview",
|
33 |
azure_deployment="gpt-35-turbo",
|
@@ -51,7 +50,6 @@ def summarize(text):
|
|
51 |
"Given the new context, refine the original summary, keep it less than 70 words, do not insert information not found in the text,"
|
52 |
"Audience is potential reader who wants to know what is the article generally about"
|
53 |
"If the context isn't useful, return the original summary.")
|
54 |
-
logging.info(refine_template)
|
55 |
refine_prompt = PromptTemplate.from_template(refine_template)
|
56 |
chain = load_summarize_chain(
|
57 |
llm=llm,
|
|
|
27 |
Raises:
|
28 |
None
|
29 |
"""
|
|
|
30 |
llm = AzureChatOpenAI(
|
31 |
openai_api_version="2023-06-01-preview",
|
32 |
azure_deployment="gpt-35-turbo",
|
|
|
50 |
"Given the new context, refine the original summary, keep it less than 70 words, do not insert information not found in the text,"
|
51 |
"Audience is potential reader who wants to know what is the article generally about"
|
52 |
"If the context isn't useful, return the original summary.")
|
|
|
53 |
refine_prompt = PromptTemplate.from_template(refine_template)
|
54 |
chain = load_summarize_chain(
|
55 |
llm=llm,
|
controllers/utils.py
CHANGED
@@ -223,7 +223,7 @@ def update_content(report):
|
|
223 |
"""
|
224 |
dynamodb = get_client_connection()
|
225 |
response = dynamodb.update_item(
|
226 |
-
TableName="
|
227 |
Key={
|
228 |
'id': {
|
229 |
'S': str(report['id'])
|
@@ -293,7 +293,7 @@ def update_reference(report):
|
|
293 |
"""
|
294 |
dynamodb = get_client_connection()
|
295 |
response = dynamodb.update_item(
|
296 |
-
TableName="
|
297 |
Key={
|
298 |
'id': {
|
299 |
'S': str(report['refID'])
|
@@ -495,7 +495,7 @@ def extract_reference(row):
|
|
495 |
str(row['sourceID']) + str(row['referenceID']))
|
496 |
logging.info("%s - %s - %s - %s",
|
497 |
date, repr(title), row['sourceID'], row['referenceID'])
|
498 |
-
|
499 |
else:
|
500 |
for title in reference_titles:
|
501 |
if 'split' in pattern:
|
@@ -531,7 +531,7 @@ def extract_reference(row):
|
|
531 |
str(row['sourceID']) + str(row['referenceID']))
|
532 |
logging.info("%s - %s - %s", repr(title), row['sourceID'],
|
533 |
row['referenceID'])
|
534 |
-
|
535 |
except Exception as error:
|
536 |
logging.error(error)
|
537 |
|
@@ -667,6 +667,6 @@ def crawl_by_url(url, article):
|
|
667 |
article['id'] = uuid.uuid5(uuid.NAMESPACE_OID,
|
668 |
article['titleCN'] + article['publishDate'])
|
669 |
logging.info("%s - %s", article['id'], article['site'])
|
670 |
-
|
671 |
|
672 |
data = download_files_from_s3('data')
|
|
|
223 |
"""
|
224 |
dynamodb = get_client_connection()
|
225 |
response = dynamodb.update_item(
|
226 |
+
TableName="article_china_test",
|
227 |
Key={
|
228 |
'id': {
|
229 |
'S': str(report['id'])
|
|
|
293 |
"""
|
294 |
dynamodb = get_client_connection()
|
295 |
response = dynamodb.update_item(
|
296 |
+
TableName="reference_china_test",
|
297 |
Key={
|
298 |
'id': {
|
299 |
'S': str(report['refID'])
|
|
|
495 |
str(row['sourceID']) + str(row['referenceID']))
|
496 |
logging.info("%s - %s - %s - %s",
|
497 |
date, repr(title), row['sourceID'], row['referenceID'])
|
498 |
+
update_reference(row)
|
499 |
else:
|
500 |
for title in reference_titles:
|
501 |
if 'split' in pattern:
|
|
|
531 |
str(row['sourceID']) + str(row['referenceID']))
|
532 |
logging.info("%s - %s - %s", repr(title), row['sourceID'],
|
533 |
row['referenceID'])
|
534 |
+
update_reference(row)
|
535 |
except Exception as error:
|
536 |
logging.error(error)
|
537 |
|
|
|
667 |
article['id'] = uuid.uuid5(uuid.NAMESPACE_OID,
|
668 |
article['titleCN'] + article['publishDate'])
|
669 |
logging.info("%s - %s", article['id'], article['site'])
|
670 |
+
update_content(article)
|
671 |
|
672 |
data = download_files_from_s3('data')
|
source/eastmoney.py
CHANGED
@@ -17,6 +17,7 @@ from controllers.utils import (
|
|
17 |
fetch_url,
|
18 |
sentiment_computation,
|
19 |
translate,
|
|
|
20 |
)
|
21 |
|
22 |
with open('xpath.json', 'r', encoding='UTF-8') as f:
|
@@ -74,7 +75,7 @@ def _crawl(url, article):
|
|
74 |
article['sentimentScore'], article[
|
75 |
'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
|
76 |
extract_reference(article)
|
77 |
-
|
78 |
logging.info(article)
|
79 |
|
80 |
|
|
|
17 |
fetch_url,
|
18 |
sentiment_computation,
|
19 |
translate,
|
20 |
+
update_content
|
21 |
)
|
22 |
|
23 |
with open('xpath.json', 'r', encoding='UTF-8') as f:
|
|
|
75 |
article['sentimentScore'], article[
|
76 |
'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
|
77 |
extract_reference(article)
|
78 |
+
update_content(article)
|
79 |
logging.info(article)
|
80 |
|
81 |
|