underwriting-workflow / schemas /account_statement.py
vamsidharmuthireddy's picture
Upload 90 files
52c1998 verified
import datetime
import re
from pydantic import (
BaseModel,
Field,
ValidationInfo,
computed_field,
model_validator,
ConfigDict
)
import pandas as pd
class UKBankAccountStatement(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
statement_start_date: datetime.date | None = Field(
default=None,
description="Digital Bank account statement period's start date in YYYY-MM-DD format",
examples=["2025-01-01"],
)
statement_end_date: datetime.date | None = Field(
default=None,
description="Digital Bank account statement period's end date in YYYY-MM-DD format",
examples=["2025-01-31"],
)
first_salary_deposit_date_present: int | datetime.date | None = Field(
default=None,
description=(
"The day/date of the very first salary deposit line item present in"
" the bank account statement. Value must be gte 1 & lte 31"
),
examples=[
"If first present salary deposit date is 2025-01-06, then 6 must be passed"
],
)
bank_name: str | None = Field(
default=None,
description="Extracted bank name value, stripped of whitespaces at beginning & end",
examples=["HSBC"],
) # , min_length=4, max_length=50)
full_name: str | None = Field(
default=None,
description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61",
examples=["Jodie Pippa"],
) # , min_length=2, max_length=61)
account_number: str | None = Field(
default=None,
description="UK Bank Account Statement's account number. Must be of 8 characters length only",
examples=["12345678"],
) # , min_length=8, max_length=8) # 12345678
sort_code: str | None = Field(
default=None,
description="UK Bank Account Sort Code. Must be of length 8 characters only. Format: xx-xx-xx",
examples="20-00-00",
) # , min_length=8, max_length=8) # 20-00-00
# is_salary_credit_consistent_across_months: bool = Field(
# default=False,
# description=(
# "If the bank account statement spans several months, sense check "
# "whether salary deposit amounts across months are consistent"
# ),
# examples=[True, False, None],
# )
account_statement_date_err_msgs: str | None = None
full_name_err_msgs: str | None = None
bank_name_err_msgs: str | None = None
account_number_err_msgs: str | None = None
sort_code_err_msgs: str | None = None
salary_deposit_err_msgs: str | None = None
validation_policy_status_df: pd.DataFrame = pd.DataFrame(
columns=["Policy", "Value", "Status", "Message"])
@model_validator(mode="after")
def validate_full_name(cls, values, info: ValidationInfo):
"""Match applicant's full name against provided name (case-insensitive)"""
try:
err_msgs = []
expected = (
info.context.get("application_summary_full_name")
if info.context
else None
)
full_name_val = values.full_name
if not full_name_val:
err_msgs.append("Applicant's full name not present")
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Full Name",
full_name_val,
False,
"Applicant's full name not present",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Full Name",
full_name_val,
True,
"Applicant's full name is present",
]
full_name_val_len = 0
if full_name_val:
full_name_val_len = len(full_name_val)
if not full_name_val and not (
full_name_val_len >= 2 and full_name_val_len <= 61
):
err_msgs.append(
"Full name must have a length of at least 2 & at most 61"
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Full Name",
full_name_val_len,
False,
"Full name does not have a length of at least 2 & at most 61",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Full Name",
full_name_val_len,
True,
"Full name has a length of at least 2 & at most 61",
]
if (
not expected
or not full_name_val
or full_name_val.lower() != expected.lower()
):
err_msgs.append("Name mismatch with provided value")
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Data Match",
f"{full_name_val}, {expected}",
False,
"Name does not match with provided value",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Data Match",
f"{full_name_val}, {expected}",
True,
"Name matches with provided value",
]
if not full_name_val or len(full_name_val.strip().split(" ")) < 2:
err_msgs.append(
"Full name must consist of at least 2 words (first name + last name)"
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Full Name",
full_name_val,
False,
"Full name does not consist of at least 2 words (first name + last name)",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Full Name",
full_name_val,
True,
"Full name consists of at least 2 words (first name + last name)",
]
if err_msgs:
values.full_name_err_msgs = ", ".join(err_msgs)
else:
values.full_name_err_msgs = None
return values
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_bank_name(cls, values, info: ValidationInfo):
"""Match bank name against provided name (case-insensitive)"""
try:
err_msgs = []
expected = (
info.context.get("application_summary_bank_name")
if info.context
else None
)
bank_name_val = values.bank_name
if not bank_name_val:
err_msgs.append("Bank name not present")
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank name",
bank_name_val,
False,
"Bank name is not present",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank name",
bank_name_val,
True,
"Bank name is present",
]
bank_name_val_len = 0
if bank_name_val:
bank_name_val_len = len(bank_name_val)
if not bank_name_val and not (
bank_name_val_len >= 4 and bank_name_val_len <= 50
):
err_msgs.append(
"Bank name must have a length of at least 4 & at most 50"
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank name",
bank_name_val_len,
False,
"Bank name does not have a length of at least 4 & at most 50",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank name",
bank_name_val_len,
True,
"Bank name has a length of at least 4 & at most 50",
]
if (
not expected
or not bank_name_val
or bank_name_val.lower() != expected.lower()
):
err_msgs.append("Bank name mismatch with provided value")
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Data Match",
f"{bank_name_val}, {expected}",
False,
"Bank name does not match with provided value",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Data Match",
f"{bank_name_val}, {expected}",
True,
"Bank name matches with provided value",
]
if err_msgs:
values.bank_name_err_msgs = ", ".join(err_msgs)
else:
values.bank_name_err_msgs = None
return values
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_account_number(cls, values):
"""Validate detected bank account number"""
try:
err_msgs = list()
if not values.account_number:
err_msgs.append(
"Bank account number not present. Bank account number must be present."
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank account number",
values.account_number,
False,
"Bank account number is not present.",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank account number",
values.account_number,
True,
"Bank name matches is present",
]
if not values.account_number or not re.fullmatch(
r"^\d{8}$", values.account_number
):
err_msgs.append(
"Provided account number is invalid. It must be of 8 digits length only"
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank account number",
values.account_number,
False,
"Provided account number is invalid",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Bank account number",
values.account_number,
True,
"Provided account number is valid",
]
if err_msgs:
values.account_number_err_msgs = ", ".join(err_msgs)
else:
values.account_number_err_msgs = None
return values
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_sort_code(cls, values):
"""Validate extracted Bank Account Sort Code"""
try:
err_msgs = list()
if not values.sort_code:
err_msgs.append(
"Sort code not present. Sort number must be present.")
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Sort code",
values.sort_code,
False,
"Sort code is not present.",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Sort code",
values.sort_code,
True,
"Sort code is present.",
]
# if not values.sort_code or not re.fullmatch(r"^\d{2}-?\d{2}-?\d{2}$", values.sort_code):
if not values.sort_code or not re.fullmatch(
r"^\d{2}-\d{2}-\d{2}$", values.sort_code
):
err_msgs.append(
"Provided sort code's format is invalid. It must be of the format xx-xx-xx wherein x are digits."
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Sort code",
values.sort_code,
False,
"Sort code's format is invalid.",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Sort code",
values.sort_code,
True,
"Sort code's format is valid.",
]
if err_msgs:
values.sort_code_err_msgs = ", ".join(err_msgs)
else:
values.sort_code_err_msgs = None
return values
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_bank_account_statement_dates(cls, values):
try:
err_msgs = list()
statement_start_date_val = values.statement_start_date
statement_end_date_val = values.statement_end_date
if not statement_start_date_val or not statement_end_date_val:
err_msgs.append(
"Both statement start date & statement end date must be present"
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Date checks",
f"{statement_start_date_val}, {statement_end_date_val}",
False,
"Both statement start date & statement end date are not present",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Date checks",
f"{statement_start_date_val}, {statement_end_date_val}",
True,
"Both statement start date & statement end date are present",
]
if statement_start_date_val and statement_end_date_val:
if (statement_end_date_val - statement_start_date_val).days < 28:
err_msgs.append(
"Account statement period's start date & end date must have a gap of at least 28 days"
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Coverage",
f"{statement_start_date_val}, {statement_end_date_val}",
False,
"Account statement period's start date & end date donot have a gap of at least 28 days",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Coverage",
f"{statement_start_date_val}, {statement_end_date_val}",
True,
"Account statement period's start date & end date have a gap of at least 28 days",
]
if err_msgs:
values.account_statement_date_err_msgs = ", ".join(err_msgs)
else:
values.account_statement_date_err_msgs = None
return values
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_salary_credit_checks(cls, values):
try:
err_msgs = list()
statement_start_date_val = values.statement_start_date
statement_end_date_val = values.statement_end_date
first_salary_deposit_date_present_val = (
values.first_salary_deposit_date_present
)
# # is_salary_credit_present_val = values.is_salary_credit_present
# is_salary_credit_consistent_across_months_val = (
# values.is_salary_credit_consistent_across_months
# )
# if not statement_start_date_val or not statement_end_date_val:
# err_msgs.append(
# "Both statement start date & statement end date must be present"
# )
# values.validation_policy_status_df.loc[len(
# values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", False, "Both statement start date & statement end date are not present"]
# else:
# values.validation_policy_status_df.loc[len(
# values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", True, "Both statement start date & statement end date are present"]
if not first_salary_deposit_date_present_val:
err_msgs.append("At least one salary credit must be present")
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Salary deposit",
first_salary_deposit_date_present_val,
False,
"At least one salary credit is not present",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Salary deposit",
first_salary_deposit_date_present_val,
True,
"At least one salary credit is present",
]
if (
not statement_start_date_val
or not statement_end_date_val
or (statement_end_date_val < statement_start_date_val)
):
err_msgs.append(
"Statement period's end date must be after the start date"
)
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Date checks",
f"{statement_start_date_val}, {statement_end_date_val}",
False,
"Statement period's end date is not after the start date",
]
else:
values.validation_policy_status_df.loc[
len(values.validation_policy_status_df)
] = [
"Date checks",
f"{statement_start_date_val}, {statement_end_date_val}",
True,
"Statement period's end date is after the start date",
]
# # if start and end and (start.month != end.month or start.year != end.year):
# if (
# statement_start_date_val
# and statement_end_date_val
# and first_salary_deposit_date_present_val
# and (
# statement_start_date_val.month < statement_end_date_val.month
# or statement_start_date_val.year < statement_end_date_val.year
# )
# and (
# statement_end_date_val.day >= first_salary_deposit_date_present_val
# )
# ):
# if not is_salary_credit_consistent_across_months_val:
# err_msgs.append(
# "Salary credit amount across months must be consistent"
# )
if err_msgs:
values.salary_deposit_err_msgs = ", ".join(err_msgs)
else:
values.salary_deposit_err_msgs = None
return values
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@computed_field
@property
def is_red_flagged(self) -> bool:
if (
self.account_statement_date_err_msgs
or self.full_name_err_msgs
or self.bank_name_err_msgs
or self.account_number_err_msgs
or self.sort_code_err_msgs
or self.salary_deposit_err_msgs
):
return True
return False