OxbridgeEconomics commited on
Commit
ca144fd
·
1 Parent(s): eaaa5dd
Files changed (1) hide show
  1. utils.py +2 -0
utils.py CHANGED
@@ -120,10 +120,12 @@ def extract_reference(row):
120
  reference_dates = re.findall(pattern['date_regex'], extracted_text)
121
  reference_titles = [s.replace(' ', '') for s in reference_titles]
122
  reference_dates = [s.replace(' ', '') for s in reference_dates]
 
123
  if 'remove' in pattern:
124
  for remove_string in pattern['remove']:
125
  reference_titles = [s.replace(remove_string, '') for s in reference_titles]
126
  for title, date in zip(reference_titles, reference_dates):
 
127
  try:
128
  date = datetime.strptime(date, pattern['date_format'])
129
  except:
 
120
  reference_dates = re.findall(pattern['date_regex'], extracted_text)
121
  reference_titles = [s.replace(' ', '') for s in reference_titles]
122
  reference_dates = [s.replace(' ', '') for s in reference_dates]
123
+ print(reference_dates, reference_titles)
124
  if 'remove' in pattern:
125
  for remove_string in pattern['remove']:
126
  reference_titles = [s.replace(remove_string, '') for s in reference_titles]
127
  for title, date in zip(reference_titles, reference_dates):
128
+ print(title, date)
129
  try:
130
  date = datetime.strptime(date, pattern['date_format'])
131
  except: