OxbridgeEconomics
commited on
Commit
·
19f7db5
1
Parent(s):
fba27b9
commit
Browse files
utils.py
CHANGED
@@ -114,8 +114,11 @@ def isnot_substring(list_a, string_to_check):
|
|
114 |
return True
|
115 |
|
116 |
def extract_reference(row):
|
|
|
|
|
117 |
try:
|
118 |
pattern = next((elem for elem in patterns if elem['site'] == row['site']), None)
|
|
|
119 |
extracted_text = extract_from_pdf_by_pattern(row['attachment'],pattern)
|
120 |
reference_titles = re.findall(pattern['article_regex'], extracted_text)
|
121 |
reference_dates = re.findall(pattern['date_regex'], extracted_text)
|
|
|
114 |
return True
|
115 |
|
116 |
def extract_reference(row):
|
117 |
+
print(row['site'])
|
118 |
+
print(patterns)
|
119 |
try:
|
120 |
pattern = next((elem for elem in patterns if elem['site'] == row['site']), None)
|
121 |
+
print(pattern)
|
122 |
extracted_text = extract_from_pdf_by_pattern(row['attachment'],pattern)
|
123 |
reference_titles = re.findall(pattern['article_regex'], extracted_text)
|
124 |
reference_dates = re.findall(pattern['date_regex'], extracted_text)
|