OxbridgeEconomics
commited on
Commit
·
ca144fd
1
Parent(s):
eaaa5dd
commit
Browse files
utils.py
CHANGED
@@ -120,10 +120,12 @@ def extract_reference(row):
|
|
120 |
reference_dates = re.findall(pattern['date_regex'], extracted_text)
|
121 |
reference_titles = [s.replace(' ', '') for s in reference_titles]
|
122 |
reference_dates = [s.replace(' ', '') for s in reference_dates]
|
|
|
123 |
if 'remove' in pattern:
|
124 |
for remove_string in pattern['remove']:
|
125 |
reference_titles = [s.replace(remove_string, '') for s in reference_titles]
|
126 |
for title, date in zip(reference_titles, reference_dates):
|
|
|
127 |
try:
|
128 |
date = datetime.strptime(date, pattern['date_format'])
|
129 |
except:
|
|
|
120 |
reference_dates = re.findall(pattern['date_regex'], extracted_text)
|
121 |
reference_titles = [s.replace(' ', '') for s in reference_titles]
|
122 |
reference_dates = [s.replace(' ', '') for s in reference_dates]
|
123 |
+
print(reference_dates, reference_titles)
|
124 |
if 'remove' in pattern:
|
125 |
for remove_string in pattern['remove']:
|
126 |
reference_titles = [s.replace(remove_string, '') for s in reference_titles]
|
127 |
for title, date in zip(reference_titles, reference_dates):
|
128 |
+
print(title, date)
|
129 |
try:
|
130 |
date = datetime.strptime(date, pattern['date_format'])
|
131 |
except:
|