Replace logging with print statements for content update and reference extraction functions
Browse files- controllers/utils.py +4 -5
controllers/utils.py
CHANGED
@@ -164,7 +164,6 @@ def translate(text):
|
|
164 |
return GoogleTranslator(source='auto', target='en').translate(text)
|
165 |
|
166 |
|
167 |
-
|
168 |
def sentiment_computation(content):
|
169 |
"""
|
170 |
Compute the sentiment score and label for the given content.
|
@@ -223,7 +222,7 @@ def update_content(report):
|
|
223 |
Returns:
|
224 |
None
|
225 |
"""
|
226 |
-
|
227 |
dynamodb = get_client_connection()
|
228 |
response = dynamodb.update_item(
|
229 |
TableName="article_china",
|
@@ -282,7 +281,7 @@ def update_content(report):
|
|
282 |
}
|
283 |
})
|
284 |
vectorize(report)
|
285 |
-
|
286 |
|
287 |
|
288 |
def update_reference(report):
|
@@ -435,7 +434,7 @@ def extract_reference(row):
|
|
435 |
None
|
436 |
"""
|
437 |
try:
|
438 |
-
|
439 |
pattern = next(
|
440 |
(elem for elem in patterns if elem['site'] == row['site']), None)
|
441 |
extracted_text = extract_from_pdf_by_pattern(row['attachment'],
|
@@ -444,7 +443,7 @@ def extract_reference(row):
|
|
444 |
reference_dates = re.findall(pattern['date_regex'], extracted_text)
|
445 |
reference_titles = [s.replace(' ', '') for s in reference_titles]
|
446 |
reference_dates = [s.replace(' ', '') for s in reference_dates]
|
447 |
-
|
448 |
if 'remove' in pattern:
|
449 |
for remove_string in pattern['remove']:
|
450 |
reference_titles = [
|
|
|
164 |
return GoogleTranslator(source='auto', target='en').translate(text)
|
165 |
|
166 |
|
|
|
167 |
def sentiment_computation(content):
|
168 |
"""
|
169 |
Compute the sentiment score and label for the given content.
|
|
|
222 |
Returns:
|
223 |
None
|
224 |
"""
|
225 |
+
print("Updating content for %s", report['id'])
|
226 |
dynamodb = get_client_connection()
|
227 |
response = dynamodb.update_item(
|
228 |
TableName="article_china",
|
|
|
281 |
}
|
282 |
})
|
283 |
vectorize(report)
|
284 |
+
print(response)
|
285 |
|
286 |
|
287 |
def update_reference(report):
|
|
|
434 |
None
|
435 |
"""
|
436 |
try:
|
437 |
+
print("Extracting reference for %s", row['id'])
|
438 |
pattern = next(
|
439 |
(elem for elem in patterns if elem['site'] == row['site']), None)
|
440 |
extracted_text = extract_from_pdf_by_pattern(row['attachment'],
|
|
|
443 |
reference_dates = re.findall(pattern['date_regex'], extracted_text)
|
444 |
reference_titles = [s.replace(' ', '') for s in reference_titles]
|
445 |
reference_dates = [s.replace(' ', '') for s in reference_dates]
|
446 |
+
print("%s - %s", reference_dates, reference_titles)
|
447 |
if 'remove' in pattern:
|
448 |
for remove_string in pattern['remove']:
|
449 |
reference_titles = [
|