import datetime import re from pydantic import ( BaseModel, Field, ValidationInfo, computed_field, model_validator, ConfigDict ) import pandas as pd class UKBankAccountStatement(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) statement_start_date: datetime.date | None = Field( default=None, description="Digital Bank account statement period's start date in YYYY-MM-DD format", examples=["2025-01-01"], ) statement_end_date: datetime.date | None = Field( default=None, description="Digital Bank account statement period's end date in YYYY-MM-DD format", examples=["2025-01-31"], ) first_salary_deposit_date_present: int | datetime.date | None = Field( default=None, description=( "The day/date of the very first salary deposit line item present in" " the bank account statement. Value must be gte 1 & lte 31" ), examples=[ "If first present salary deposit date is 2025-01-06, then 6 must be passed" ], ) bank_name: str | None = Field( default=None, description="Extracted bank name value, stripped of whitespaces at beginning & end", examples=["HSBC"], ) # , min_length=4, max_length=50) full_name: str | None = Field( default=None, description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61", examples=["Jodie Pippa"], ) # , min_length=2, max_length=61) account_number: str | None = Field( default=None, description="UK Bank Account Statement's account number. Must be of 8 characters length only", examples=["12345678"], ) # , min_length=8, max_length=8) # 12345678 sort_code: str | None = Field( default=None, description="UK Bank Account Sort Code. Must be of length 8 characters only. Format: xx-xx-xx", examples="20-00-00", ) # , min_length=8, max_length=8) # 20-00-00 # is_salary_credit_consistent_across_months: bool = Field( # default=False, # description=( # "If the bank account statement spans several months, sense check " # "whether salary deposit amounts across months are consistent" # ), # examples=[True, False, None], # ) account_statement_date_err_msgs: str | None = None full_name_err_msgs: str | None = None bank_name_err_msgs: str | None = None account_number_err_msgs: str | None = None sort_code_err_msgs: str | None = None salary_deposit_err_msgs: str | None = None validation_policy_status_df: pd.DataFrame = pd.DataFrame( columns=["Policy", "Value", "Status", "Message"]) @model_validator(mode="after") def validate_full_name(cls, values, info: ValidationInfo): """Match applicant's full name against provided name (case-insensitive)""" try: err_msgs = [] expected = ( info.context.get("application_summary_full_name") if info.context else None ) full_name_val = values.full_name if not full_name_val: err_msgs.append("Applicant's full name not present") values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Full Name", full_name_val, False, "Applicant's full name not present", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Full Name", full_name_val, True, "Applicant's full name is present", ] full_name_val_len = 0 if full_name_val: full_name_val_len = len(full_name_val) if not full_name_val and not ( full_name_val_len >= 2 and full_name_val_len <= 61 ): err_msgs.append( "Full name must have a length of at least 2 & at most 61" ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Full Name", full_name_val_len, False, "Full name does not have a length of at least 2 & at most 61", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Full Name", full_name_val_len, True, "Full name has a length of at least 2 & at most 61", ] if ( not expected or not full_name_val or full_name_val.lower() != expected.lower() ): err_msgs.append("Name mismatch with provided value") values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Data Match", f"{full_name_val}, {expected}", False, "Name does not match with provided value", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Data Match", f"{full_name_val}, {expected}", True, "Name matches with provided value", ] if not full_name_val or len(full_name_val.strip().split(" ")) < 2: err_msgs.append( "Full name must consist of at least 2 words (first name + last name)" ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Full Name", full_name_val, False, "Full name does not consist of at least 2 words (first name + last name)", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Full Name", full_name_val, True, "Full name consists of at least 2 words (first name + last name)", ] if err_msgs: values.full_name_err_msgs = ", ".join(err_msgs) else: values.full_name_err_msgs = None return values except Exception as e: # logger.exception(e, exc_info=True) # return None raise @model_validator(mode="after") def validate_bank_name(cls, values, info: ValidationInfo): """Match bank name against provided name (case-insensitive)""" try: err_msgs = [] expected = ( info.context.get("application_summary_bank_name") if info.context else None ) bank_name_val = values.bank_name if not bank_name_val: err_msgs.append("Bank name not present") values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank name", bank_name_val, False, "Bank name is not present", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank name", bank_name_val, True, "Bank name is present", ] bank_name_val_len = 0 if bank_name_val: bank_name_val_len = len(bank_name_val) if not bank_name_val and not ( bank_name_val_len >= 4 and bank_name_val_len <= 50 ): err_msgs.append( "Bank name must have a length of at least 4 & at most 50" ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank name", bank_name_val_len, False, "Bank name does not have a length of at least 4 & at most 50", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank name", bank_name_val_len, True, "Bank name has a length of at least 4 & at most 50", ] if ( not expected or not bank_name_val or bank_name_val.lower() != expected.lower() ): err_msgs.append("Bank name mismatch with provided value") values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Data Match", f"{bank_name_val}, {expected}", False, "Bank name does not match with provided value", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Data Match", f"{bank_name_val}, {expected}", True, "Bank name matches with provided value", ] if err_msgs: values.bank_name_err_msgs = ", ".join(err_msgs) else: values.bank_name_err_msgs = None return values except Exception as e: # logger.exception(e, exc_info=True) # return None raise @model_validator(mode="after") def validate_account_number(cls, values): """Validate detected bank account number""" try: err_msgs = list() if not values.account_number: err_msgs.append( "Bank account number not present. Bank account number must be present." ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank account number", values.account_number, False, "Bank account number is not present.", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank account number", values.account_number, True, "Bank name matches is present", ] if not values.account_number or not re.fullmatch( r"^\d{8}$", values.account_number ): err_msgs.append( "Provided account number is invalid. It must be of 8 digits length only" ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank account number", values.account_number, False, "Provided account number is invalid", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Bank account number", values.account_number, True, "Provided account number is valid", ] if err_msgs: values.account_number_err_msgs = ", ".join(err_msgs) else: values.account_number_err_msgs = None return values except Exception as e: # logger.exception(e, exc_info=True) # return None raise @model_validator(mode="after") def validate_sort_code(cls, values): """Validate extracted Bank Account Sort Code""" try: err_msgs = list() if not values.sort_code: err_msgs.append( "Sort code not present. Sort number must be present.") values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Sort code", values.sort_code, False, "Sort code is not present.", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Sort code", values.sort_code, True, "Sort code is present.", ] # if not values.sort_code or not re.fullmatch(r"^\d{2}-?\d{2}-?\d{2}$", values.sort_code): if not values.sort_code or not re.fullmatch( r"^\d{2}-\d{2}-\d{2}$", values.sort_code ): err_msgs.append( "Provided sort code's format is invalid. It must be of the format xx-xx-xx wherein x are digits." ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Sort code", values.sort_code, False, "Sort code's format is invalid.", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Sort code", values.sort_code, True, "Sort code's format is valid.", ] if err_msgs: values.sort_code_err_msgs = ", ".join(err_msgs) else: values.sort_code_err_msgs = None return values except Exception as e: # logger.exception(e, exc_info=True) # return None raise @model_validator(mode="after") def validate_bank_account_statement_dates(cls, values): try: err_msgs = list() statement_start_date_val = values.statement_start_date statement_end_date_val = values.statement_end_date if not statement_start_date_val or not statement_end_date_val: err_msgs.append( "Both statement start date & statement end date must be present" ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Date checks", f"{statement_start_date_val}, {statement_end_date_val}", False, "Both statement start date & statement end date are not present", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Date checks", f"{statement_start_date_val}, {statement_end_date_val}", True, "Both statement start date & statement end date are present", ] if statement_start_date_val and statement_end_date_val: if (statement_end_date_val - statement_start_date_val).days < 28: err_msgs.append( "Account statement period's start date & end date must have a gap of at least 28 days" ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Coverage", f"{statement_start_date_val}, {statement_end_date_val}", False, "Account statement period's start date & end date donot have a gap of at least 28 days", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Coverage", f"{statement_start_date_val}, {statement_end_date_val}", True, "Account statement period's start date & end date have a gap of at least 28 days", ] if err_msgs: values.account_statement_date_err_msgs = ", ".join(err_msgs) else: values.account_statement_date_err_msgs = None return values except Exception as e: # logger.exception(e, exc_info=True) # return None raise @model_validator(mode="after") def validate_salary_credit_checks(cls, values): try: err_msgs = list() statement_start_date_val = values.statement_start_date statement_end_date_val = values.statement_end_date first_salary_deposit_date_present_val = ( values.first_salary_deposit_date_present ) # # is_salary_credit_present_val = values.is_salary_credit_present # is_salary_credit_consistent_across_months_val = ( # values.is_salary_credit_consistent_across_months # ) # if not statement_start_date_val or not statement_end_date_val: # err_msgs.append( # "Both statement start date & statement end date must be present" # ) # values.validation_policy_status_df.loc[len( # values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", False, "Both statement start date & statement end date are not present"] # else: # values.validation_policy_status_df.loc[len( # values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", True, "Both statement start date & statement end date are present"] if not first_salary_deposit_date_present_val: err_msgs.append("At least one salary credit must be present") values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Salary deposit", first_salary_deposit_date_present_val, False, "At least one salary credit is not present", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Salary deposit", first_salary_deposit_date_present_val, True, "At least one salary credit is present", ] if ( not statement_start_date_val or not statement_end_date_val or (statement_end_date_val < statement_start_date_val) ): err_msgs.append( "Statement period's end date must be after the start date" ) values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Date checks", f"{statement_start_date_val}, {statement_end_date_val}", False, "Statement period's end date is not after the start date", ] else: values.validation_policy_status_df.loc[ len(values.validation_policy_status_df) ] = [ "Date checks", f"{statement_start_date_val}, {statement_end_date_val}", True, "Statement period's end date is after the start date", ] # # if start and end and (start.month != end.month or start.year != end.year): # if ( # statement_start_date_val # and statement_end_date_val # and first_salary_deposit_date_present_val # and ( # statement_start_date_val.month < statement_end_date_val.month # or statement_start_date_val.year < statement_end_date_val.year # ) # and ( # statement_end_date_val.day >= first_salary_deposit_date_present_val # ) # ): # if not is_salary_credit_consistent_across_months_val: # err_msgs.append( # "Salary credit amount across months must be consistent" # ) if err_msgs: values.salary_deposit_err_msgs = ", ".join(err_msgs) else: values.salary_deposit_err_msgs = None return values except Exception as e: # logger.exception(e, exc_info=True) # return None raise @computed_field @property def is_red_flagged(self) -> bool: if ( self.account_statement_date_err_msgs or self.full_name_err_msgs or self.bank_name_err_msgs or self.account_number_err_msgs or self.sort_code_err_msgs or self.salary_deposit_err_msgs ): return True return False