OxbridgeEconomics commited on
Commit
19f7db5
·
1 Parent(s): fba27b9
Files changed (1) hide show
  1. utils.py +3 -0
utils.py CHANGED
@@ -114,8 +114,11 @@ def isnot_substring(list_a, string_to_check):
114
  return True
115
 
116
  def extract_reference(row):
 
 
117
  try:
118
  pattern = next((elem for elem in patterns if elem['site'] == row['site']), None)
 
119
  extracted_text = extract_from_pdf_by_pattern(row['attachment'],pattern)
120
  reference_titles = re.findall(pattern['article_regex'], extracted_text)
121
  reference_dates = re.findall(pattern['date_regex'], extracted_text)
 
114
  return True
115
 
116
  def extract_reference(row):
117
+ print(row['site'])
118
+ print(patterns)
119
  try:
120
  pattern = next((elem for elem in patterns if elem['site'] == row['site']), None)
121
+ print(pattern)
122
  extracted_text = extract_from_pdf_by_pattern(row['attachment'],pattern)
123
  reference_titles = re.findall(pattern['article_regex'], extracted_text)
124
  reference_dates = re.findall(pattern['date_regex'], extracted_text)