Spaces:
Build error
Build error
import datetime | |
import re | |
from pydantic import ( | |
BaseModel, | |
Field, | |
ValidationInfo, | |
computed_field, | |
model_validator, | |
ConfigDict | |
) | |
import pandas as pd | |
class UKBankAccountStatement(BaseModel): | |
model_config = ConfigDict(arbitrary_types_allowed=True) | |
statement_start_date: datetime.date | None = Field( | |
default=None, | |
description="Digital Bank account statement period's start date in YYYY-MM-DD format", | |
examples=["2025-01-01"], | |
) | |
statement_end_date: datetime.date | None = Field( | |
default=None, | |
description="Digital Bank account statement period's end date in YYYY-MM-DD format", | |
examples=["2025-01-31"], | |
) | |
first_salary_deposit_date_present: int | datetime.date | None = Field( | |
default=None, | |
description=( | |
"The day/date of the very first salary deposit line item present in" | |
" the bank account statement. Value must be gte 1 & lte 31" | |
), | |
examples=[ | |
"If first present salary deposit date is 2025-01-06, then 6 must be passed" | |
], | |
) | |
bank_name: str | None = Field( | |
default=None, | |
description="Extracted bank name value, stripped of whitespaces at beginning & end", | |
examples=["HSBC"], | |
) # , min_length=4, max_length=50) | |
full_name: str | None = Field( | |
default=None, | |
description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61", | |
examples=["Jodie Pippa"], | |
) # , min_length=2, max_length=61) | |
account_number: str | None = Field( | |
default=None, | |
description="UK Bank Account Statement's account number. Must be of 8 characters length only", | |
examples=["12345678"], | |
) # , min_length=8, max_length=8) # 12345678 | |
sort_code: str | None = Field( | |
default=None, | |
description="UK Bank Account Sort Code. Must be of length 8 characters only. Format: xx-xx-xx", | |
examples="20-00-00", | |
) # , min_length=8, max_length=8) # 20-00-00 | |
# is_salary_credit_consistent_across_months: bool = Field( | |
# default=False, | |
# description=( | |
# "If the bank account statement spans several months, sense check " | |
# "whether salary deposit amounts across months are consistent" | |
# ), | |
# examples=[True, False, None], | |
# ) | |
account_statement_date_err_msgs: str | None = None | |
full_name_err_msgs: str | None = None | |
bank_name_err_msgs: str | None = None | |
account_number_err_msgs: str | None = None | |
sort_code_err_msgs: str | None = None | |
salary_deposit_err_msgs: str | None = None | |
validation_policy_status_df: pd.DataFrame = pd.DataFrame( | |
columns=["Policy", "Value", "Status", "Message"]) | |
def validate_full_name(cls, values, info: ValidationInfo): | |
"""Match applicant's full name against provided name (case-insensitive)""" | |
try: | |
err_msgs = [] | |
expected = ( | |
info.context.get("application_summary_full_name") | |
if info.context | |
else None | |
) | |
full_name_val = values.full_name | |
if not full_name_val: | |
err_msgs.append("Applicant's full name not present") | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Full Name", | |
full_name_val, | |
False, | |
"Applicant's full name not present", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Full Name", | |
full_name_val, | |
True, | |
"Applicant's full name is present", | |
] | |
full_name_val_len = 0 | |
if full_name_val: | |
full_name_val_len = len(full_name_val) | |
if not full_name_val and not ( | |
full_name_val_len >= 2 and full_name_val_len <= 61 | |
): | |
err_msgs.append( | |
"Full name must have a length of at least 2 & at most 61" | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Full Name", | |
full_name_val_len, | |
False, | |
"Full name does not have a length of at least 2 & at most 61", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Full Name", | |
full_name_val_len, | |
True, | |
"Full name has a length of at least 2 & at most 61", | |
] | |
if ( | |
not expected | |
or not full_name_val | |
or full_name_val.lower() != expected.lower() | |
): | |
err_msgs.append("Name mismatch with provided value") | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Data Match", | |
f"{full_name_val}, {expected}", | |
False, | |
"Name does not match with provided value", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Data Match", | |
f"{full_name_val}, {expected}", | |
True, | |
"Name matches with provided value", | |
] | |
if not full_name_val or len(full_name_val.strip().split(" ")) < 2: | |
err_msgs.append( | |
"Full name must consist of at least 2 words (first name + last name)" | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Full Name", | |
full_name_val, | |
False, | |
"Full name does not consist of at least 2 words (first name + last name)", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Full Name", | |
full_name_val, | |
True, | |
"Full name consists of at least 2 words (first name + last name)", | |
] | |
if err_msgs: | |
values.full_name_err_msgs = ", ".join(err_msgs) | |
else: | |
values.full_name_err_msgs = None | |
return values | |
except Exception as e: | |
# logger.exception(e, exc_info=True) | |
# return None | |
raise | |
def validate_bank_name(cls, values, info: ValidationInfo): | |
"""Match bank name against provided name (case-insensitive)""" | |
try: | |
err_msgs = [] | |
expected = ( | |
info.context.get("application_summary_bank_name") | |
if info.context | |
else None | |
) | |
bank_name_val = values.bank_name | |
if not bank_name_val: | |
err_msgs.append("Bank name not present") | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank name", | |
bank_name_val, | |
False, | |
"Bank name is not present", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank name", | |
bank_name_val, | |
True, | |
"Bank name is present", | |
] | |
bank_name_val_len = 0 | |
if bank_name_val: | |
bank_name_val_len = len(bank_name_val) | |
if not bank_name_val and not ( | |
bank_name_val_len >= 4 and bank_name_val_len <= 50 | |
): | |
err_msgs.append( | |
"Bank name must have a length of at least 4 & at most 50" | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank name", | |
bank_name_val_len, | |
False, | |
"Bank name does not have a length of at least 4 & at most 50", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank name", | |
bank_name_val_len, | |
True, | |
"Bank name has a length of at least 4 & at most 50", | |
] | |
if ( | |
not expected | |
or not bank_name_val | |
or bank_name_val.lower() != expected.lower() | |
): | |
err_msgs.append("Bank name mismatch with provided value") | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Data Match", | |
f"{bank_name_val}, {expected}", | |
False, | |
"Bank name does not match with provided value", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Data Match", | |
f"{bank_name_val}, {expected}", | |
True, | |
"Bank name matches with provided value", | |
] | |
if err_msgs: | |
values.bank_name_err_msgs = ", ".join(err_msgs) | |
else: | |
values.bank_name_err_msgs = None | |
return values | |
except Exception as e: | |
# logger.exception(e, exc_info=True) | |
# return None | |
raise | |
def validate_account_number(cls, values): | |
"""Validate detected bank account number""" | |
try: | |
err_msgs = list() | |
if not values.account_number: | |
err_msgs.append( | |
"Bank account number not present. Bank account number must be present." | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank account number", | |
values.account_number, | |
False, | |
"Bank account number is not present.", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank account number", | |
values.account_number, | |
True, | |
"Bank name matches is present", | |
] | |
if not values.account_number or not re.fullmatch( | |
r"^\d{8}$", values.account_number | |
): | |
err_msgs.append( | |
"Provided account number is invalid. It must be of 8 digits length only" | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank account number", | |
values.account_number, | |
False, | |
"Provided account number is invalid", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Bank account number", | |
values.account_number, | |
True, | |
"Provided account number is valid", | |
] | |
if err_msgs: | |
values.account_number_err_msgs = ", ".join(err_msgs) | |
else: | |
values.account_number_err_msgs = None | |
return values | |
except Exception as e: | |
# logger.exception(e, exc_info=True) | |
# return None | |
raise | |
def validate_sort_code(cls, values): | |
"""Validate extracted Bank Account Sort Code""" | |
try: | |
err_msgs = list() | |
if not values.sort_code: | |
err_msgs.append( | |
"Sort code not present. Sort number must be present.") | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Sort code", | |
values.sort_code, | |
False, | |
"Sort code is not present.", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Sort code", | |
values.sort_code, | |
True, | |
"Sort code is present.", | |
] | |
# if not values.sort_code or not re.fullmatch(r"^\d{2}-?\d{2}-?\d{2}$", values.sort_code): | |
if not values.sort_code or not re.fullmatch( | |
r"^\d{2}-\d{2}-\d{2}$", values.sort_code | |
): | |
err_msgs.append( | |
"Provided sort code's format is invalid. It must be of the format xx-xx-xx wherein x are digits." | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Sort code", | |
values.sort_code, | |
False, | |
"Sort code's format is invalid.", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Sort code", | |
values.sort_code, | |
True, | |
"Sort code's format is valid.", | |
] | |
if err_msgs: | |
values.sort_code_err_msgs = ", ".join(err_msgs) | |
else: | |
values.sort_code_err_msgs = None | |
return values | |
except Exception as e: | |
# logger.exception(e, exc_info=True) | |
# return None | |
raise | |
def validate_bank_account_statement_dates(cls, values): | |
try: | |
err_msgs = list() | |
statement_start_date_val = values.statement_start_date | |
statement_end_date_val = values.statement_end_date | |
if not statement_start_date_val or not statement_end_date_val: | |
err_msgs.append( | |
"Both statement start date & statement end date must be present" | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Date checks", | |
f"{statement_start_date_val}, {statement_end_date_val}", | |
False, | |
"Both statement start date & statement end date are not present", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Date checks", | |
f"{statement_start_date_val}, {statement_end_date_val}", | |
True, | |
"Both statement start date & statement end date are present", | |
] | |
if statement_start_date_val and statement_end_date_val: | |
if (statement_end_date_val - statement_start_date_val).days < 28: | |
err_msgs.append( | |
"Account statement period's start date & end date must have a gap of at least 28 days" | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Coverage", | |
f"{statement_start_date_val}, {statement_end_date_val}", | |
False, | |
"Account statement period's start date & end date donot have a gap of at least 28 days", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Coverage", | |
f"{statement_start_date_val}, {statement_end_date_val}", | |
True, | |
"Account statement period's start date & end date have a gap of at least 28 days", | |
] | |
if err_msgs: | |
values.account_statement_date_err_msgs = ", ".join(err_msgs) | |
else: | |
values.account_statement_date_err_msgs = None | |
return values | |
except Exception as e: | |
# logger.exception(e, exc_info=True) | |
# return None | |
raise | |
def validate_salary_credit_checks(cls, values): | |
try: | |
err_msgs = list() | |
statement_start_date_val = values.statement_start_date | |
statement_end_date_val = values.statement_end_date | |
first_salary_deposit_date_present_val = ( | |
values.first_salary_deposit_date_present | |
) | |
# # is_salary_credit_present_val = values.is_salary_credit_present | |
# is_salary_credit_consistent_across_months_val = ( | |
# values.is_salary_credit_consistent_across_months | |
# ) | |
# if not statement_start_date_val or not statement_end_date_val: | |
# err_msgs.append( | |
# "Both statement start date & statement end date must be present" | |
# ) | |
# values.validation_policy_status_df.loc[len( | |
# values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", False, "Both statement start date & statement end date are not present"] | |
# else: | |
# values.validation_policy_status_df.loc[len( | |
# values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", True, "Both statement start date & statement end date are present"] | |
if not first_salary_deposit_date_present_val: | |
err_msgs.append("At least one salary credit must be present") | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Salary deposit", | |
first_salary_deposit_date_present_val, | |
False, | |
"At least one salary credit is not present", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Salary deposit", | |
first_salary_deposit_date_present_val, | |
True, | |
"At least one salary credit is present", | |
] | |
if ( | |
not statement_start_date_val | |
or not statement_end_date_val | |
or (statement_end_date_val < statement_start_date_val) | |
): | |
err_msgs.append( | |
"Statement period's end date must be after the start date" | |
) | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Date checks", | |
f"{statement_start_date_val}, {statement_end_date_val}", | |
False, | |
"Statement period's end date is not after the start date", | |
] | |
else: | |
values.validation_policy_status_df.loc[ | |
len(values.validation_policy_status_df) | |
] = [ | |
"Date checks", | |
f"{statement_start_date_val}, {statement_end_date_val}", | |
True, | |
"Statement period's end date is after the start date", | |
] | |
# # if start and end and (start.month != end.month or start.year != end.year): | |
# if ( | |
# statement_start_date_val | |
# and statement_end_date_val | |
# and first_salary_deposit_date_present_val | |
# and ( | |
# statement_start_date_val.month < statement_end_date_val.month | |
# or statement_start_date_val.year < statement_end_date_val.year | |
# ) | |
# and ( | |
# statement_end_date_val.day >= first_salary_deposit_date_present_val | |
# ) | |
# ): | |
# if not is_salary_credit_consistent_across_months_val: | |
# err_msgs.append( | |
# "Salary credit amount across months must be consistent" | |
# ) | |
if err_msgs: | |
values.salary_deposit_err_msgs = ", ".join(err_msgs) | |
else: | |
values.salary_deposit_err_msgs = None | |
return values | |
except Exception as e: | |
# logger.exception(e, exc_info=True) | |
# return None | |
raise | |
def is_red_flagged(self) -> bool: | |
if ( | |
self.account_statement_date_err_msgs | |
or self.full_name_err_msgs | |
or self.bank_name_err_msgs | |
or self.account_number_err_msgs | |
or self.sort_code_err_msgs | |
or self.salary_deposit_err_msgs | |
): | |
return True | |
return False | |