OxbridgeEconomics commited on
Commit
270ad28
·
1 Parent(s): c633e86
controllers/summarizer.py CHANGED
@@ -27,7 +27,6 @@ def summarize(text):
27
  Raises:
28
  None
29
  """
30
- logging.info(text)
31
  llm = AzureChatOpenAI(
32
  openai_api_version="2023-06-01-preview",
33
  azure_deployment="gpt-35-turbo",
@@ -51,7 +50,6 @@ def summarize(text):
51
  "Given the new context, refine the original summary, keep it less than 70 words, do not insert information not found in the text,"
52
  "Audience is potential reader who wants to know what is the article generally about"
53
  "If the context isn't useful, return the original summary.")
54
- logging.info(refine_template)
55
  refine_prompt = PromptTemplate.from_template(refine_template)
56
  chain = load_summarize_chain(
57
  llm=llm,
 
27
  Raises:
28
  None
29
  """
 
30
  llm = AzureChatOpenAI(
31
  openai_api_version="2023-06-01-preview",
32
  azure_deployment="gpt-35-turbo",
 
50
  "Given the new context, refine the original summary, keep it less than 70 words, do not insert information not found in the text,"
51
  "Audience is potential reader who wants to know what is the article generally about"
52
  "If the context isn't useful, return the original summary.")
 
53
  refine_prompt = PromptTemplate.from_template(refine_template)
54
  chain = load_summarize_chain(
55
  llm=llm,
controllers/utils.py CHANGED
@@ -223,7 +223,7 @@ def update_content(report):
223
  """
224
  dynamodb = get_client_connection()
225
  response = dynamodb.update_item(
226
- TableName="article_china",
227
  Key={
228
  'id': {
229
  'S': str(report['id'])
@@ -293,7 +293,7 @@ def update_reference(report):
293
  """
294
  dynamodb = get_client_connection()
295
  response = dynamodb.update_item(
296
- TableName="reference_china",
297
  Key={
298
  'id': {
299
  'S': str(report['refID'])
@@ -495,7 +495,7 @@ def extract_reference(row):
495
  str(row['sourceID']) + str(row['referenceID']))
496
  logging.info("%s - %s - %s - %s",
497
  date, repr(title), row['sourceID'], row['referenceID'])
498
- # update_reference(row)
499
  else:
500
  for title in reference_titles:
501
  if 'split' in pattern:
@@ -531,7 +531,7 @@ def extract_reference(row):
531
  str(row['sourceID']) + str(row['referenceID']))
532
  logging.info("%s - %s - %s", repr(title), row['sourceID'],
533
  row['referenceID'])
534
- # update_reference(row)
535
  except Exception as error:
536
  logging.error(error)
537
 
@@ -667,6 +667,6 @@ def crawl_by_url(url, article):
667
  article['id'] = uuid.uuid5(uuid.NAMESPACE_OID,
668
  article['titleCN'] + article['publishDate'])
669
  logging.info("%s - %s", article['id'], article['site'])
670
- # update_content(article)
671
 
672
  data = download_files_from_s3('data')
 
223
  """
224
  dynamodb = get_client_connection()
225
  response = dynamodb.update_item(
226
+ TableName="article_china_test",
227
  Key={
228
  'id': {
229
  'S': str(report['id'])
 
293
  """
294
  dynamodb = get_client_connection()
295
  response = dynamodb.update_item(
296
+ TableName="reference_china_test",
297
  Key={
298
  'id': {
299
  'S': str(report['refID'])
 
495
  str(row['sourceID']) + str(row['referenceID']))
496
  logging.info("%s - %s - %s - %s",
497
  date, repr(title), row['sourceID'], row['referenceID'])
498
+ update_reference(row)
499
  else:
500
  for title in reference_titles:
501
  if 'split' in pattern:
 
531
  str(row['sourceID']) + str(row['referenceID']))
532
  logging.info("%s - %s - %s", repr(title), row['sourceID'],
533
  row['referenceID'])
534
+ update_reference(row)
535
  except Exception as error:
536
  logging.error(error)
537
 
 
667
  article['id'] = uuid.uuid5(uuid.NAMESPACE_OID,
668
  article['titleCN'] + article['publishDate'])
669
  logging.info("%s - %s", article['id'], article['site'])
670
+ update_content(article)
671
 
672
  data = download_files_from_s3('data')
source/eastmoney.py CHANGED
@@ -17,6 +17,7 @@ from controllers.utils import (
17
  fetch_url,
18
  sentiment_computation,
19
  translate,
 
20
  )
21
 
22
  with open('xpath.json', 'r', encoding='UTF-8') as f:
@@ -74,7 +75,7 @@ def _crawl(url, article):
74
  article['sentimentScore'], article[
75
  'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
76
  extract_reference(article)
77
- # update_content(article)
78
  logging.info(article)
79
 
80
 
 
17
  fetch_url,
18
  sentiment_computation,
19
  translate,
20
+ update_content
21
  )
22
 
23
  with open('xpath.json', 'r', encoding='UTF-8') as f:
 
75
  article['sentimentScore'], article[
76
  'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
77
  extract_reference(article)
78
+ update_content(article)
79
  logging.info(article)
80
 
81