vamsidharmuthireddy's picture
Upload 90 files
48e7216 verified
import datetime
import re
from pydantic import (
BaseModel,
computed_field,
Field,
ValidationInfo,
model_validator,
ConfigDict
)
import pandas as pd
class UKPayslipSchema(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
pay_period_start_date: datetime.date | None = Field(
default=None,
description="Pay period's start date in YYYY-MM-DD format",
examples=["2025-02-01"],
)
pay_period_end_date: datetime.date | None = Field(
default=None,
description="Pay period's end date in YYYY-MM-DD format",
examples=["2025-02-28"],
)
pay_period_days: int | None = Field(
default=None,
description="pay_period_end_date - pay_period_start_date in days",
examples=[28],
)
pay_date: datetime.date | None = Field(None)
full_name: str | None = Field(
default=None,
description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61",
examples=["Jodie Pippa"],
)
employer_name: str | None = Field(
default=None,
description="Employer name extracted",
examples=["ABC Ltd"],
)
is_basic_pay_net_pay_other_salary_components_present: bool = Field(
default=False,
description="Boolean indicating whether Basic Pay, Net Pay, other requisite salary components/line items are present in the payslip",
examples=[True, False],
)
is_tax_deducation_present: bool = Field(
default=False,
description="Boolean flag indicating whether Tax Deduction line item is present in the payslip",
examples=[True, False],
)
is_ni_deduction_present: bool = Field(
default=False,
description="Boolean flag indicating whether NI/National Insurance deduction line item is present in the payslip",
examples=[True, False],
)
complete_employee_address: str | None = Field(
default=None,
description="Employee's complete address as a string",
examples=["123 Maple Street, London, UK, SW1A 1AA"],
)
# employee_number: int | None = Field(
# default=None,
# description="Employee number",
# examples=[3558, 1234],
# )
pay_dates_err_msgs: str | None = None
full_name_err_msgs: str | None = None
employer_name_err_msgs: str | None = None
payslip_line_item_presence_err_msgs: str | None = None
complete_employee_address_err_msgs: str | None = None
validation_policy_status_df: pd.DataFrame = pd.DataFrame(
columns=["Policy", "Value", "Status", "Message"])
# employee_number_err_msgs: str | None = None
# is_red_flagged: bool = False
@model_validator(mode="after")
def validate_full_name(self, info: ValidationInfo):
"""Match applicant's full name against provided name (case-insensitive)"""
try:
err_msgs = []
expected = (
info.context.get("application_summary_full_name")
if info.context
else None
)
if not self.full_name:
err_msgs.append("Applicant's full name not present")
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", self.full_name, False, "Applicant's full name is not present"]
else:
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", self.full_name, True, "Applicant's full name is present"]
full_name_val_len = len(self.full_name) if self.full_name else 0
if not (2 <= full_name_val_len <= 61):
err_msgs.append(
"Full name must have a length of at least 2 & at most 61"
)
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", full_name_val_len, False, "Full name has a length of at least 2 & at most 61"]
else:
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", full_name_val_len, True, "Full name has a length of at least 2 & at most 61"]
if not self.full_name or len(self.full_name.strip().split(" ")) < 2:
err_msgs.append(
"Full name must consist of at least 2 words (first name + last name)"
)
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", self.full_name , False, "Full name does not consist of at least 2 words (first name + last name)"]
else:
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", len(self.full_name.strip().split(" ")), True, "Full name consists of at least 2 words (first name + last name)"]
if (
not expected
or not self.full_name
or self.full_name.lower() != expected.lower()
):
err_msgs.append("Name mismatch with provided value")
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", f"{self.full_name}, {expected}", False, "Name does not match with provided value"]
else:
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", f"{self.full_name}, {expected}", True, "Name matches with provided value"]
self.full_name_err_msgs = ", ".join(err_msgs) if err_msgs else None
return self
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_employer_name(self, info: ValidationInfo):
"""Match employer against provided employer name (case-insensitive)"""
try:
err_msgs = []
expected = (
info.context.get("application_summary_employer_name")
if info.context
else None
)
employer_name_val = self.employer_name
if not employer_name_val:
err_msgs.append("Employer name not present")
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", employer_name_val, False, "Employer name is not present"]
else:
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", employer_name_val, True, "Employer name is present"]
is_employer_name_match = (
expected
and employer_name_val
and employer_name_val.lower() == expected.lower()
)
if not is_employer_name_match:
err_msgs.append("Employer name mismatch with provided value")
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", f"{employer_name_val}, {expected}", False, "Employer name does not match with provided value"]
else:
self.validation_policy_status_df.loc[len(
self.validation_policy_status_df)] = ["Employer & Customer Names", f"{employer_name_val}, {expected}", True, "Employer name matches with provided value"]
# # Allowed: letters, numbers, spaces, and common name punctuation
# pattern = r"^[A-Za-z0-9&\-,.()'/@ ]{2,100}$"
# if not re.match(pattern, employer_name_val):
# err_msgs.append("Employer name contains invalid characters")
if not re.search(r"[A-Za-z]", employer_name_val):
err_msgs.append(
"Employer name must contain at least one letter")
if employer_name_val.strip() == "":
err_msgs.append("Employer name cannot be only whitespace")
self.employer_name_err_msgs = ", ".join(
err_msgs) if err_msgs else None
return self
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_payslip_dates(self):
try:
err_msgs = []
today = datetime.date.today()
threshold_date = today - datetime.timedelta(days=35)
if not self.pay_period_start_date or not self.pay_period_end_date:
err_msgs.append(
"Undated Payslips"
)
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Undated Payslips",
f"{self.pay_period_start_date}, {self.pay_period_end_date}",
False,
"Undated payslip",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Undated Payslips",
f"{self.pay_period_start_date}, {self.pay_period_end_date}",
True,
"Dated payslip",
]
# self.is_red_flagged = True
if self.pay_date:
if not (threshold_date <= self.pay_date <= today):
err_msgs.append(
"Pay date must be within the last 35 days & not in the future"
)
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Date Requirement",
self.pay_date,
False,
"Pay date is not within the last 35 days & not in the future",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Date Requirement",
self.pay_date,
True,
"Pay date is within the last 35 days & not in the future",
]
# elif self.pay_period_end_date:
else:
if not (threshold_date <= self.pay_period_end_date <= today):
err_msgs.append(
"Pay period's end date must be within the last 35 days & not in the future"
)
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Period End Date (DD/MM/YYYY, if no pay date)",
self.pay_date,
False,
"Pay date is not within the last 35 days &/or in the future",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Period End Date (DD/MM/YYYY, if no pay date)",
self.pay_date,
True,
"Pay date is within the last 35 days & not in the future",
]
prev_month_end = datetime.date.today().replace(day=1) - \
datetime.timedelta(days=1)
prev_month_start = prev_month_end.replace(day=1)
if not (
prev_month_start <= self.pay_period_start_date
and self.pay_period_start_date < self.pay_period_end_date <= today
):
err_msgs.append(
"Payslip date(s) must not be older than those of the last calendar month"
)
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Period Month (MM/YYYY, if no pay date) basis pay period duration",
self.pay_date,
False,
"Payslip date(s) is older than those of the last calendar month",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Period Month (MM/YYYY, if no pay date) basis pay period duration",
self.pay_date,
True,
"Payslip date(s) is not older than those of the last calendar month",
]
if self.pay_period_start_date and self.pay_period_end_date:
if self.pay_period_start_date >= self.pay_period_end_date:
err_msgs.append(
"Pay period's start date must be before the end date")
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Period Start & End Dates",
f"{self.pay_period_start_date}, {self.pay_period_end_date}",
False,
"Pay period's start date is not before the end date",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Period Start & End Dates",
f"{self.pay_period_start_date}, {self.pay_period_end_date}",
True,
"Pay period's start date is before the end date",
]
if (self.pay_period_end_date - self.pay_period_start_date).days < 28:
err_msgs.append(
"Pay period's start date & end date must have a gap of at least 28 days"
)
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Submission Requirement (Monthly Pay)",
(self.pay_period_end_date - self.pay_period_start_date).days,
False,
"Pay period's start date & end date donot have a gap of at least 28 days",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Submission Requirement (Monthly Pay)",
(self.pay_period_end_date - self.pay_period_start_date).days,
True,
"Pay period's start date & end date have a gap of at least 28 days",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Pay Period Start & End Dates",
f"{self.pay_period_start_date}, {self.pay_period_end_date}",
False,
"Pay period's start date is not before the end date",
]
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Submission Requirement (Monthly Pay)",
f"{self.pay_period_start_date}, {self.pay_period_end_date}",
False,
"Pay period's start date & end date donot have a gap of at least 28 days",
]
self.pay_dates_err_msgs = ", ".join(err_msgs) if err_msgs else None
return self
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_payslip_components_checks(self):
try:
err_msgs = []
if not self.is_basic_pay_net_pay_other_salary_components_present:
err_msgs.append(
"Basic salary, Net Salary and/or other requisite salary components not present"
)
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Requisite salary line items",
self.is_basic_pay_net_pay_other_salary_components_present,
False,
"Basic salary, Net Salary and/or other requisite salary components not present",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Requisite salary line items",
self.is_basic_pay_net_pay_other_salary_components_present,
True,
"Basic salary, Net Salary and/or other requisite salary components are present",
]
if not self.is_tax_deducation_present:
err_msgs.append("Tax Deduction line item must be present")
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Tax & NI Contributions",
self.is_tax_deducation_present,
False,
"Tax Deduction line item is not present",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Tax & NI Contributions",
self.is_tax_deducation_present,
True,
"Tax Deduction line item is present",
]
if not self.is_ni_deduction_present:
err_msgs.append("NI/National Insurance line item must be present")
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Tax & NI Contributions",
self.is_ni_deduction_present,
False,
"NI/National Insurance line item is not present",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Tax & NI Contributions",
self.is_ni_deduction_present,
True,
"NI/National Insurance line item is present",
]
self.payslip_line_item_presence_err_msgs = (
", ".join(err_msgs) if err_msgs else None
)
return self
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
@model_validator(mode="after")
def validate_complete_address(self, info: ValidationInfo):
try:
err_msgs = []
expected = (
info.context.get("application_summary_complete_address")
if info.context
else None
)
val = self.complete_employee_address
if not val:
err_msgs.append("Applicant's address not present")
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Applicant Address",
val,
False,
"Applicant's address not present",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Applicant Address",
val,
True,
"Applicant's address is present",
]
length = len(val) if val else 0
if not (10 <= length <= 300):
err_msgs.append(
"Applicant's complete address must have a length of at least 10 & at most 300"
)
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Applicant Address",
length,
False,
"Applicant's complete address does not have a length of at least 10 & at most 300",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Applicant Address",
length,
True,
"Applicant's complete address has a length of at least 10 & at most 300",
]
if not expected or not val or val.lower() != expected.lower():
err_msgs.append("Complete address mismatch with provided value")
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Applicant Address",
f"{val}, {expected}",
False,
"Complete address mismatch with provided value",
]
else:
self.validation_policy_status_df.loc[
len(self.validation_policy_status_df)
] = [
"Applicant Address",
f"{val}, {expected}",
True,
"Complete address matches with provided value",
]
self.complete_employee_address_err_msgs = (
", ".join(err_msgs) if err_msgs else None
)
return self
except Exception as e:
# logger.exception(e, exc_info=True)
# return None
raise
# @model_validator(mode="after")
# def validate_employee_number(self):
# try:
# if self.employee_number and self.employee_number <= 25:
# self.complete_employee_address_err_msgs = "Employee number low"
# return self
# except Exception as e:
# raise
@computed_field
@property
def is_red_flagged(self) -> bool:
if any([
self.pay_dates_err_msgs,
self.full_name_err_msgs,
self.employer_name_err_msgs,
self.payslip_line_item_presence_err_msgs,
self.complete_employee_address_err_msgs,
# self.employee_number_err_msgs,
]):
return True
return False