Spaces:

Oxbridge-Economics
/

Data-Collection-China

Running

App Files Files Community

OxbridgeEconomics commited on Aug 17, 2024

Commit

270ad28

1 Parent(s): c633e86

commit

Browse files

Files changed (3) hide show

controllers/summarizer.py +0 -2
controllers/utils.py +5 -5
source/eastmoney.py +2 -1

controllers/summarizer.py CHANGED Viewed

@@ -27,7 +27,6 @@ def summarize(text):
     Raises:
         None
     """
-    logging.info(text)
     llm = AzureChatOpenAI(
         openai_api_version="2023-06-01-preview",
         azure_deployment="gpt-35-turbo",
@@ -51,7 +50,6 @@ def summarize(text):
         "Given the new context, refine the original summary, keep it less than 70 words, do not insert information not found in the text,"
         "Audience is potential reader who wants to know what is the article generally about"
         "If the context isn't useful, return the original summary.")
-    logging.info(refine_template)
     refine_prompt = PromptTemplate.from_template(refine_template)
     chain = load_summarize_chain(
         llm=llm,

     Raises:
         None
     """
     llm = AzureChatOpenAI(
         openai_api_version="2023-06-01-preview",
         azure_deployment="gpt-35-turbo",
         "Given the new context, refine the original summary, keep it less than 70 words, do not insert information not found in the text,"
         "Audience is potential reader who wants to know what is the article generally about"
         "If the context isn't useful, return the original summary.")
     refine_prompt = PromptTemplate.from_template(refine_template)
     chain = load_summarize_chain(
         llm=llm,

controllers/utils.py CHANGED Viewed

@@ -223,7 +223,7 @@ def update_content(report):
     """
     dynamodb = get_client_connection()
     response = dynamodb.update_item(
-        TableName="article_china",
         Key={
             'id': {
                 'S': str(report['id'])
@@ -293,7 +293,7 @@ def update_reference(report):
     """
     dynamodb = get_client_connection()
     response = dynamodb.update_item(
-        TableName="reference_china",
         Key={
             'id': {
                 'S': str(report['refID'])
@@ -495,7 +495,7 @@ def extract_reference(row):
                         str(row['sourceID']) + str(row['referenceID']))
                     logging.info("%s - %s - %s - %s",
                                  date, repr(title), row['sourceID'], row['referenceID'])
-                    # update_reference(row)
         else:
             for title in reference_titles:
                 if 'split' in pattern:
@@ -531,7 +531,7 @@ def extract_reference(row):
                         str(row['sourceID']) + str(row['referenceID']))
                     logging.info("%s - %s - %s", repr(title), row['sourceID'],
                                  row['referenceID'])
-                    # update_reference(row)
     except Exception as error:
         logging.error(error)
@@ -667,6 +667,6 @@ def crawl_by_url(url, article):
     article['id'] = uuid.uuid5(uuid.NAMESPACE_OID,
                                article['titleCN'] + article['publishDate'])
     logging.info("%s - %s", article['id'], article['site'])
-    # update_content(article)
 data = download_files_from_s3('data')

     """
     dynamodb = get_client_connection()
     response = dynamodb.update_item(
+        TableName="article_china_test",
         Key={
             'id': {
                 'S': str(report['id'])
     """
     dynamodb = get_client_connection()
     response = dynamodb.update_item(
+        TableName="reference_china_test",
         Key={
             'id': {
                 'S': str(report['refID'])
                         str(row['sourceID']) + str(row['referenceID']))
                     logging.info("%s - %s - %s - %s",
                                  date, repr(title), row['sourceID'], row['referenceID'])
+                    update_reference(row)
         else:
             for title in reference_titles:
                 if 'split' in pattern:
                         str(row['sourceID']) + str(row['referenceID']))
                     logging.info("%s - %s - %s", repr(title), row['sourceID'],
                                  row['referenceID'])
+                    update_reference(row)
     except Exception as error:
         logging.error(error)
     article['id'] = uuid.uuid5(uuid.NAMESPACE_OID,
                                article['titleCN'] + article['publishDate'])
     logging.info("%s - %s", article['id'], article['site'])
+    update_content(article)
 data = download_files_from_s3('data')

source/eastmoney.py CHANGED Viewed

@@ -17,6 +17,7 @@ from controllers.utils import (
     fetch_url,
     sentiment_computation,
     translate,
 )
 with open('xpath.json', 'r', encoding='UTF-8') as f:
@@ -74,7 +75,7 @@ def _crawl(url, article):
     article['sentimentScore'], article[
         'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
     extract_reference(article)
-    # update_content(article)
     logging.info(article)

     fetch_url,
     sentiment_computation,
     translate,
+    update_content
 )
 with open('xpath.json', 'r', encoding='UTF-8') as f:
     article['sentimentScore'], article[
         'sentimentLabel'] = sentiment_computation(contentcn.replace("\n", ""))
     extract_reference(article)
+    update_content(article)
     logging.info(article)