underwriting-workflow / utils /prep_validators_payload.py
vamsidharmuthireddy's picture
Upload 90 files
52c1998 verified
from typing import Any, Dict
import schemas
from utils.logger import setup_logger
logger = setup_logger(__name__)
def group_documents_by_type(obj, result=None):
if result is None:
result = {
"payslip": [],
"bank_statement": [],
"passport": [],
"driving_license": [],
}
if isinstance(obj, dict):
doc_type = obj.get("document_type")
if doc_type in result:
result[doc_type].append(obj)
for value in obj.values():
group_documents_by_type(value, result)
elif isinstance(obj, list):
for item in obj:
group_documents_by_type(item, result)
return result
# Transformation Functions
def transform_validate_payslip(
data: Dict[str, Any], application_form_dict: Dict[str, str]
) -> schemas.UKPayslipSchema:
# return schemas.UKPayslipSchema(
# pay_period_start_date=data.get("pay_period_start"),
# pay_period_end_date=data.get("pay_period_end"),
# pay_date=data.get("payslip_date"),
# full_name=data.get("employee_name"),
# employer_name=data.get("employer_name"),
# is_basic_pay_net_pay_other_salary_components_present=bool(
# data.get("basic_pay") and data.get("net_pay")
# ),
# is_tax_deducation_present=bool(data.get("tax_deduction")),
# is_ni_deduction_present=bool(data.get("ni_contribution")),
# complete_employee_address=None,
# employee_number=None,
# )
payslip_payload = {
"pay_period_start_date": data.get("pay_period_start"),
"pay_period_end_date": data.get("pay_period_end"),
"pay_date": data.get("payslip_date"),
"full_name": data.get("employee_name"),
"employer_name": data.get("employer_name"),
"is_basic_pay_net_pay_other_salary_components_present": bool(
data.get("basic_pay") and data.get("net_pay")
),
"is_tax_deducation_present": bool(data.get("tax_deduction")),
"is_ni_deduction_present": bool(data.get("ni_contribution")),
"complete_employee_address": data.get("employee_address"),
# "employee_number": data.get("employee_id"),
}
# return payslip_payload
return schemas.UKPayslipSchema.model_validate(
payslip_payload,
context=application_form_dict,
).model_dump()
def transform_validate_passport(
data: Dict[str, Any], application_form_dict: Dict[str, str]
) -> schemas.UKPassportSchema:
# name = data.get("full_name") or f"{data.get('given_names', '')} {data.get('surname', '')}".strip()
passport_payload = {
"full_name": data.get("given_names"),
"expiry_date": data.get("date_of_expiry"),
}
# return schemas.UKPassportSchema(
# full_name=name,
# expiry_date=data.get("date_of_expiry"),
# )
# return passport_payload
return schemas.UKPassportSchema.model_validate(
passport_payload,
context=application_form_dict,
).model_dump()
def transform_validate_driving_license(
data: Dict[str, Any], application_form_dict: Dict[str, str]
) -> schemas.UKDrivingLicense:
name = data.get("full_name") or f"{data.get('first_name', '')} {data.get('surname', '')}".strip()
driving_license_payload = {"full_name": name,}
# return schemas.UKPassportSchema(
# full_name=name,
# expiry_date=data.get("date_of_expiry"),
# )
# return passport_payload
return schemas.UKDrivingLicense.model_validate(
driving_license_payload,
context=application_form_dict,
).model_dump()
def transform_validate_bank_statement(
data: Dict[str, Any], application_form_dict: Dict[str, str]
) -> schemas.UKBankAccountStatement:
# First salary deposit date from 'salary_credits' if available
salary_credits = data.get("salary_credits", [])
first_salary_date = None
if salary_credits:
try:
# first_salary_date = int(salary_credits[0]["date"].split("-")[2])
first_salary_date = salary_credits[0]["date"]
except (IndexError, ValueError, KeyError):
pass
# return schemas.UKBankAccountStatement(
# statement_start_date=data.get("statement_start_date"),
# statement_end_date=data.get("statement_end_date"),
# first_salary_deposit_date_present=first_salary_date,
# bank_name=None, # Not present in this JSON sample
# full_name=data.get("account_holder_name"),
# account_number=None,
# sort_code=None,
# )
bank_statement_payload = {
"statement_start_date": data.get("statement_start_date"),
"statement_end_date": data.get("statement_end_date"),
"first_salary_deposit_date_present": first_salary_date,
"bank_name": data.get("bank_name"), # Not present in this JSON sample
"full_name": data.get("account_holder_name"),
"account_number": data.get("account_number"),
"sort_code": data.get("sort_code"),
}
# return bank_statement_payload
return schemas.UKBankAccountStatement.model_validate(
bank_statement_payload,
context=application_form_dict,
).model_dump()
def process_extracted_data(
extracted_data: Dict[str, Any], application_form: Dict[str, Any], full_data_transformed# schemas.CustomAppFormUpload
):
# full_data = json.loads(extracted_json_data)
# application_form_dict = application_form.model_dump()
grouped_docs = group_documents_by_type(extracted_data)
# for key in grouped_docs:
# if not grouped_docs[key]:
# return f"{key} document type file not uploaded"
transformed_validated_data = {
# "payslips": [transform_payslip(doc) for doc in grouped_docs["payslip"]],
# "bank_statements": [transform_bank_statement(doc) for doc in grouped_docs["bank_statement"]],
# "passports": [transform_passport(doc) for doc in grouped_docs["passport"]],
"payslips": [
transform_validate_payslip(doc, application_form)
for doc in grouped_docs["payslip"]
],
"bank_statements": [
transform_validate_bank_statement(doc, application_form)
for doc in grouped_docs["bank_statement"]
],
"passports": [
transform_validate_passport(doc, application_form)
for doc in grouped_docs["passport"]
],
"driving_licenses": [
transform_validate_driving_license(doc, application_form)
for doc in grouped_docs["driving_license"]
],
}
logger.info(f"transformed_validated_data: {transformed_validated_data}")
# `names_across_docs` is a set that stores unique lowercase versions of full names extracted from
# the transformed and validated data. It is used to check if the names across the uploaded
# documents match. The set ensures that only unique names are stored, and it is used to determine
# if there is consistency in the names provided across the different types of documents.
names_across_docs = set()
names_all = []
for docs in transformed_validated_data.values():
for doc in docs:
if "full_name" in doc and doc['full_name'] is not None:
names_across_docs.add(doc["full_name"].lower().replace(" ", ""))
names_all.append(doc["full_name"])
names_across_docs_match = len(names_across_docs) <= 1
if names_across_docs_match:
cross_docs_name_eq_check = {
# "Policy": "The applicant's name must match across the uploaded documents",
"Policy": "Document Consistency",
"Value": names_all[-1],
"Status": names_across_docs_match,
"Message": "Applicant's name matches across the uploaded documents",
}
else:
cross_docs_name_eq_check = {
# "Policy": "The applicant's name must match across the uploaded documents",
"Policy": "Document Consistency",
"Value": names_all,
"Status": names_across_docs_match,
"Message": "Applicant's name does not match across the uploaded documents"
}
return transformed_validated_data, cross_docs_name_eq_check