Spaces:

vamsidharmuthireddy
/

underwriting-workflow

Build error

App Files Files Community

vamsidharmuthireddy commited on 19 days ago

Commit

52c1998

verified ·

1 Parent(s): 48e7216

Upload 90 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +2 -0
__init__.py +0 -0
__pycache__/__init__.cpython-313.pyc +0 -0
api/__init__.py +0 -0
app.py +440 -0
app_fastapi.py +0 -0
app_streamlit.py +48 -0
app_streamlit_bak.py +79 -0
checks copy.ipynb +235 -0
config.toml +3 -0
llm/__init__.py +0 -0
llm/__pycache__/__init__.cpython-313.pyc +0 -0
llm/__pycache__/document_analyzer.cpython-313.pyc +0 -0
llm/__pycache__/llm.cpython-313.pyc +0 -0
llm/document_analyzer.py +102 -0
llm/llm.py +47 -0
logs_directory/app_20250414.log +78 -0
logs_directory/app_20250415.log +0 -0
logs_directory/app_20250416.log +0 -0
logs_directory/app_20250417.log +0 -0
logs_directory/app_20250420.log +0 -0
prompts/__init__.py +7 -0
prompts/__pycache__/__init__.cpython-313.pyc +0 -0
prompts/__pycache__/document_type.cpython-313.pyc +0 -0
prompts/__pycache__/genric_ocr.cpython-313.pyc +0 -0
prompts/bank_statement/__pycache__/bank_statement.cpython-313.pyc +0 -0
prompts/bank_statement/bank_statement.py +67 -0
prompts/document_type.py +103 -0
prompts/genric_ocr.py +9 -0
prompts/identity_documents/__pycache__/driving_license.cpython-313.pyc +0 -0
prompts/identity_documents/__pycache__/passport.cpython-313.pyc +0 -0
prompts/identity_documents/driving_license.py +39 -0
prompts/identity_documents/passport.py +58 -0
prompts/income_document/__pycache__/p60.cpython-313.pyc +0 -0
prompts/income_document/__pycache__/payslip.cpython-313.pyc +0 -0
prompts/income_document/p60.py +61 -0
prompts/income_document/payslip.py +84 -0
schemas/__init__.py +5 -0
schemas/__pycache__/__init__.cpython-313.pyc +0 -0
schemas/__pycache__/account_statement.cpython-313.pyc +0 -0
schemas/__pycache__/custom_app_form.cpython-313.pyc +0 -0
schemas/__pycache__/id.cpython-313.pyc +0 -0
schemas/__pycache__/payslip.cpython-313.pyc +0 -0
schemas/__pycache__/uk_address.cpython-313.pyc +0 -0
schemas/account_statement.py +609 -0
schemas/custom_app_form.py +163 -0
schemas/id.py +291 -0
schemas/payslip.py +551 -0
schemas/uk_address.py +39 -0
utils/__init__.py +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ logs_directory/

__init__.py ADDED Viewed

File without changes

__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (165 Bytes). View file

api/__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,440 @@

+import streamlit as st
+import pandas as pd
+from schemas.custom_app_form import CustomAppFormUpload
+from utils.prep_validators_payload import process_extracted_data
+upload_docs_tab, demo_validations_considered_tab, upload_docs_validation_results_tab = st.tabs(
+    ["Upload Documents", "Demo Validations", "Validation Results"]
+)
+with upload_docs_tab:
+    st.header("Upload Documents")
+    # st.markdown("## Upload Custom Application Form")
+    uploaded_custom_application_form_file = st.file_uploader(
+        label="Upload Custom Application Form",
+        accept_multiple_files=False,
+        type=["csv"],
+    )
+    uploaded_files = st.file_uploader(
+        label="Upload files to be validated",
+        accept_multiple_files=True,
+        type=["png", "jpg", "jpeg", "pdf", "zip"]
+    )
+    if uploaded_custom_application_form_file:
+        uploaded_custom_form_df = pd.read_csv(
+            uploaded_custom_application_form_file, header=None)
+        uploaded_custom_form_dict = dict(
+            zip(uploaded_custom_form_df[0], uploaded_custom_form_df[1]))
+        st.write("Raw Dictionary:")
+        st.json(uploaded_custom_form_dict)
+        custom_app_form = CustomAppFormUpload.model_validate(
+            uploaded_custom_form_dict).model_dump()
+        st.write("Parsed Dictionary:")
+        st.json(custom_app_form)
+        # print(custom_app_form)
+        if isinstance(custom_app_form, dict) and not custom_app_form.get("is_incomplete"):
+            st.session_state["custom_app_form"] = custom_app_form
+            st.write("Session State:")
+            st.write(st.session_state)
+with demo_validations_considered_tab:
+    st.header("Demo Validations")
+    # demo_validations = [
+    #     # {
+    #     #     "Document Type": "Passport",
+    #     #     "Validation": "Full name must be present",
+    #     #     "Raises Red Flag": True,
+    #     #     # "Error Message": "Applicant's full name not present",
+    #     # },
+    #     # {
+    #     #     "Document Type": "Passport",
+    #     #     "Validation": "Full name must have length between 2 & 61",
+    #     #     "Raises Red Flag": True,
+    #     #     # "Error Message": "Full name must have a length of at least 2 & at most 61",
+    #     # },
+    #     # {
+    #     #     "Document Type": "Passport",
+    #     #     "Validation": "Full name must have at least two words",
+    #     #     "Raises Red Flag": True,
+    #     #     # "Error Message": "Full name must consist of at least 2 words (first name + last name)",
+    #     # },
+    #     {
+    #         "Document Type": "Passport",
+    #         "Validation": (
+    #             "Full name must be present. "
+    #             "Full name must have length between 2 & 61. "
+    #             "Full name must have at least two words."
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Applicant's full name not present",
+    #     },
+    #     {
+    #         "Document Type": "Passport",
+    #         "Validation": "Expiry date must be present & after a year from current date",
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Provided passport expires within 1 year",
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": (
+    #             "Full name must be present. "
+    #             "Full name must have length between 2 & 61. "
+    #             "Full name must have at least two words."
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Applicant's full name not present",
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": "Employer name must be present",
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Employer name not present",
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": "Employer name must have at least alphabet",
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Employer name must contain at least one letter",
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": "Employer name cannot be only whitespace",
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Employer name cannot be only whitespace",
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": "Employer name must match the provided value",
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Employer name mismatch with provided value",
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": (
+    #             "Pay period start & dates must be present.\n"
+    #             "Pay period start date cannot be on or after the end date.\n"
+    #             "Pay period's end date must be within the last 35 days & not in the future.\n"
+    #             "Pay period's date(s) must not be older than those of the last calendar month.\n"
+    #             "Pay period's start date & end date must have a gap of at least 28 days."
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Employer name mismatch with provided value",
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": (
+    #             "Basic salary, Net Salary and/or other requisite salary components must be present. "
+    #             "Tax Deduction line item must be present. "
+    #             "NI/National Insurance line item must be present."
+    #         ),
+    #         "Raises Red Flag": True,
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": (
+    #             "Applicant's address must be present. "
+    #             "Applicant's complete address must have a length of at least 10 & at most 300. "
+    #             "Complete address must match with provided value. "
+    #         ),
+    #         "Raises Red Flag": True,
+    #     },
+    #     {
+    #         "Document Type": "Payslip",
+    #         "Validation": "Employee number must be greater than 25",
+    #         "Raises Red Flag": True,
+    #     },
+    #     {
+    #         "Document Type": "Digital Bank Account Statement",
+    #         "Validation": (
+    #             "Full name must be present. "
+    #             "Full name must have length between 2 & 61. "
+    #             "Full name must have at least two words."
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Applicant's full name not present",
+    #     },
+    #     {
+    #         "Document Type": "Digital Bank Account Statement",
+    #         "Validation": (
+    #             "Bank name must be present. "
+    #             "Bank name must have length between 4 & 50. "
+    #             "Bank Name must match provided value."
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Applicant's full name not present",
+    #     },
+    #     {
+    #         "Document Type": "Digital Bank Account Statement",
+    #         "Validation": (
+    #             "Bank account number must be present. "
+    #             "Bank account number must be of 8 digits only. "
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Applicant's full name not present",
+    #     },
+    #     {
+    #         "Document Type": "Digital Bank Account Statement",
+    #         "Validation": (
+    #             "Sort number must be present. "
+    #             "It must be of the format xx-xx-xx wherein x are digits. "
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Applicant's full name not present",
+    #     },
+    #     {
+    #         "Document Type": "Digital Bank Account Statement",
+    #         "Validation": (
+    #             "Both statement start date & statement end date must be present. "
+    #             "Account statement period's start date & end date must have a gap of at least 28 days. "
+    #             "At least one salary credit must be present. "
+    #             "Statement period's end date must be after the start date. "
+    #         ),
+    #         "Raises Red Flag": True,
+    #         # "Error Message": "Applicant's full name not present",
+    #     },
+    # ]
+    demo_validations = [
+        # {
+        #     "Topic / Document Type": "General Guidance",
+        #     "Policy / Rule / Condition": "Income/Employment Docs Risk",
+        #     "Action / Guidance / Requirement": "Be aware of higher risk of manipulation (Payslips, bank statements, Customer name).",
+        #     "Red Flag / Caution": "Higher risk category.",
+        #     "Notes / Details": "",
+        # },
+        {
+            "Topic / Document Type": "General Guidance",
+            "Policy / Rule / Condition": "Document Consistency",
+            # "Action / Guidance / Requirement": "Compare information across all documents (e.g., payslips vs bank statements) to ensure consistency.",
+            "Action / Guidance / Requirement": "Compare applicant's full name across all documents to ensure consistency.",
+            # "Red Flag / Caution": "Inconsistencies require investigation.",
+            # "Notes / Details": "",
+        },
+        # {
+        #     "Topic / Document Type": "General Guidance",
+        #     "Policy / Rule / Condition": "Payslip YTD Check",
+        #     "Action / Guidance / Requirement": "Do Year-to-Date figures (gross income, tax) make sense?",
+        #     "Red Flag / Caution": "If figures don’t make sense, investigate.",
+        #     "Notes / Details": "",
+        # },
+        # {
+        #     "Topic / Document Type": "General Guidance",
+        #     "Policy / Rule / Condition": "Payslip Details Check",
+        #     "Action / Guidance / Requirement": "Check for low employee numbers, rounded figures, differences in payment methods (e.g., payslip says BACS, statement shows Faster Payment).",
+        #     "Red Flag / Caution": "These can be red flags requiring investigation.",
+        #     "Notes / Details": "",
+        # },
+        {
+            "Topic / Document Type": "General Guidance",
+            "Policy / Rule / Condition": "Overall Validation",
+            "Action / Guidance / Requirement": "Ensure document is genuine, not fraudulent, belongs to the customer, and is from the expected source.",
+            # "Red Flag / Caution": "Any doubt may indicate fraud.",
+            # "Notes / Details": "Applies to all documents.",
+        },
+        {
+            "Topic / Document Type": "Passport",
+            "Policy / Rule / Condition": "Full Name",
+            "Action / Guidance / Requirement": (
+                "Full name must be present. "
+                "Full name must have length between 2 & 61. "
+                "Full name must have at least two words."
+            ),
+            # "Raises Red Flag": True,
+            # "Error Message": "Applicant's full name not present",
+        },
+        {
+            "Topic / Document Type": "Passport",
+            "Policy / Rule / Condition": "Expiry Date",
+            "Action / Guidance / Requirement": "Expiry date must be present & after a year from current date",
+            # "Raises Red Flag": True,
+            # "Error Message": "Provided passport expires within 1 year",
+        },
+        {
+            "Topic / Document Type": "Payslips",
+            "Policy / Rule / Condition": "Employer & Customer Names",
+            "Action / Guidance / Requirement": "Must include correct Employer’s and Customer’s names.",
+            # "Red Flag / Caution": "Missing or incorrect names.",
+            # "Notes / Details": "Cross-reference with BMM/HOME.",
+        },
+        {
+            "Topic / Document Type": "Payslips",
+            "Policy / Rule / Condition": "Submission Requirement (Monthly Pay)",
+            "Action / Guidance / Requirement": "Minimum one month's most recent payslip required.",
+            # "Red Flag / Caution": "",
+            # "Notes / Details": "",
+        },
+        {
+            "Topic / Document Type": "Payslips",
+            "Policy / Rule / Condition": "Pay Date Requirement",
+            # "Action / Guidance / Requirement": "Pay date must be within 35 days of FCD (Final Completion Date).",
+            "Action / Guidance / Requirement": "Pay date must be within 35 days of document upload date.",
+            # "Red Flag / Caution": "Pay date older than 35 days from FCD.",
+            # "Notes / Details": "",
+        },
+        {
+            "Topic / Document Type": "Payslips",
+            "Policy / Rule / Condition": "Pay Period End Date (DD/MM/YYYY, if no pay date)",
+            "Action / Guidance / Requirement": "Period end date must be within 35 days of FCD.",
+            # "Red Flag / Caution": "Period end date older than 35 days from FCD.",
+            # "Notes / Details": "",
+        },
+        {
+            "Topic / Document Type": "Payslips",
+            # "Policy / Rule / Condition": "Pay Period Month (MM/YYYY, if no pay date)",
+            "Policy / Rule / Condition": "Pay Period Month (MM/YYYY, if no pay date) basis pay period duration",
+            "Action / Guidance / Requirement": "Payslips dated in the current or previous calendar month are acceptable (must be the most recent).",
+            # "Red Flag / Caution": "Older than previous calendar month.",
+            # "Notes / Details": "",
+        },
+        {
+            "Topic / Document Type": "Payslips",
+            "Policy / Rule / Condition": "Undated Payslips",
+            "Action / Guidance / Requirement": "Unacceptable.",
+            # "Red Flag / Caution": "Undated payslip received.",
+            # "Notes / Details": "Request a dated version.",
+        },
+        {
+            "Topic / Document Type": "Payslips",
+            "Policy / Rule / Condition": "Tax & NI Contributions",
+            "Action / Guidance / Requirement": "Must be visible. Perform a sense check.",
+            # "Red Flag / Caution": "Missing or nonsensical Tax/NI figures.",
+            # "Notes / Details": "",
+        },
+        # custom
+        {
+            "Topic / Document Type": "Payslips",
+            "Policy / Rule / Condition": "Applicant Address",
+            "Action / Guidance / Requirement": (
+                "Applicant's address must be present. "
+                "Applicant's complete address must have a length of at least 10 & at most 300. "
+                "Complete address must match with provided value. "
+            ),
+            # "Red Flag / Caution": "Missing or nonsensical Tax/NI figures.",
+            # "Notes / Details": "",
+        },
+        # {
+        #     "Topic / Document Type": "Payslips",
+        #     "Policy / Rule / Condition": "YTD Figures Match",
+        #     "Action / Guidance / Requirement": "Verify YTD figures match declared income.",
+        #     "Red Flag / Caution": "YTD figures do not match declared income.",
+        #     "Notes / Details": "Add to YMI/FDM memo if they do not match.",
+        # },
+        # {
+        #     "Topic / Document Type": "Payslips",
+        #     "Policy / Rule / Condition": "Pension Income (on Payslip)",
+        #     "Action / Guidance / Requirement": "Must show within the last 35 days / be the most recent.",
+        #     "Red Flag / Caution": "Pension income shown is dated >35 days ago.",
+        #     "Notes / Details": "Alternatively use pension annual statement/latest P60. Cross-reference with bank statement if possible.",
+        # },
+        # {
+        #     "Topic / Document Type": "Payslips",
+        #     "Policy / Rule / Condition": "Joint Applicants",
+        #     "Action / Guidance / Requirement": "Required if applicable.",
+        #     "Red Flag / Caution": "Missing payslip for a joint applicant.",
+        #     "Notes / Details": "",
+        # },
+        # {
+        #     "Topic / Document Type": "Payslips",
+        #     "Policy / Rule / Condition": "Payslip Red Flags",
+        #     "Action / Guidance / Requirement": "",
+        #     "Red Flag / Caution": "Rounded figures. Low employee/payroll number. Presence of these flags.",
+        #     "Notes / Details": "Investigate further.",
+        # },
+        # {
+        #     "Topic / Document Type": "Payslips",
+        #     "Policy / Rule / Condition": "Payslip Verification (HOME)",
+        #     "Action / Guidance / Requirement": "Check information in HOME against payslip details (employer name, customer name, etc.).",
+        #     "Red Flag / Caution": "Mismatches found (e.g., misspellings, missing words).",
+        #     "Notes / Details": "Correct HOME after consulting customer. If correction not possible (e.g., space), add YMI/FDM memo explaining.",
+        # },
+        # {
+        #     "Topic / Document Type": "Payslips",
+        #     "Policy / Rule / Condition": "Payslip Near 35-Day Limit",
+        #     "Action / Guidance / Requirement": "If payslip is close to the 35-day limit and no decision is obtained.",
+        #     "Red Flag / Caution": "Decision pending, payslip nearing expiry.",
+        #     "Notes / Details": "Another, more recent payslip may be required.",
+        # },
+        # {
+        #     "Topic / Document Type": "Digital Bank Stmts",
+        #     "Policy / Rule / Condition": "Purpose",
+        #     "Action / Guidance / Requirement": "Used to confirm income/expenditure.",
+        #     "Red Flag / Caution": "",
+        #     "Notes / Details": "Cannot be used for ID & VA confirmation.",
+        # },
+        {
+            "Topic / Document Type": "Digital Bank Stmts",
+            "Policy / Rule / Condition": "Coverage",
+            # "Action / Guidance / Requirement": "Must cover a full calendar month (vs 28 days for original).",
+            "Action / Guidance / Requirement": "Account statement period's start date & end date must have a gap of at least 28 days.",
+            # "Red Flag / Caution": "",
+            # "Notes / Details": "",
+        },
+        {
+            "Topic / Document Type": "Digital Bank Stmts",
+            "Policy / Rule / Condition": "Data Match",
+            "Action / Guidance / Requirement": "Customer data on statement must match profile.",
+            # "Red Flag / Caution": "Data mismatch.",
+            # "Notes / Details": "",
+        },
+        # {
+        #     "Topic / Document Type": "Digital Bank Stmts",
+        #     "Policy / Rule / Condition": "Pay Info Match",
+        #     "Action / Guidance / Requirement": "Verify pay information matches the payslip.",
+        #     "Red Flag / Caution": "Pay info mismatch vs payslip.",
+        #     "Notes / Details": "",
+        # },
+        {
+            "Topic / Document Type": "Digital Bank Stmts",
+            "Policy / Rule / Condition": "Authenticity Doubt",
+            "Action / Guidance / Requirement": "If any doubt regarding authenticity.",
+            # "Red Flag / Caution": "Suspected non-genuine digital statement.",
+            # "Notes / Details": "Cases may be referred to Fraud.",
+        },
+        {
+            "Topic / Document Type": "Digital Bank Stmts",
+            "Policy / Rule / Condition": "Bank name",
+            "Action / Guidance / Requirement": (
+                "Bank name must be present. "
+                "Bank name must have length between 4 & 50. "
+                "Bank Name must match provided value."
+            ),
+        },
+        {
+            "Topic / Document Type": "Digital Bank Stmts",
+            "Policy / Rule / Condition": "Bank account number",
+            "Action / Guidance / Requirement": (
+                "Bank account number must be present. "
+                "Bank account number must be of 8 digits only. "
+            ),
+        },
+        {
+            "Topic / Document Type": "Digital Bank Stmts",
+            "Policy / Rule / Condition": "Sort code",
+            "Action / Guidance / Requirement": (
+                "Sort number must be present. "
+                "It must be of the format xx-xx-xx wherein x are digits. "
+            ),
+        },
+        {
+            "Topic / Document Type": "Digital Bank Stmts",
+            "Policy / Rule / Condition": "Date checks",
+            "Action / Guidance / Requirement": (
+                "Both statement start date & statement end date must be present. "
+                "At least one salary credit must be present. "
+                "Statement period's end date must be after the start date. "
+            ),
+        },
+    ]
+    demo_validations_df = pd.DataFrame(demo_validations)
+    st.table(demo_validations_df)
+with upload_docs_validation_results_tab:
+    st.header("Validation Results")
+    if st.session_state:
+        st.session_state

app_fastapi.py ADDED Viewed

File without changes

app_streamlit.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import streamlit as st
+from utils.process_files import process_uploaded_files
+from utils.document_display import display_based_on_card
+import os
+import pandas as pd
+import json
+from llm.document_analyzer import analyze_files
+from PIL import Image
+from utils import setup_logger
+from utils.session_state import reset_state
+from datetime import datetime
+import uuid
+from utils.tabs.document_upload_tab import upload_documents
+from utils.tabs.memo import display_memo
+from utils.tabs.demo_validations import display_demo_validations
+from utils.tabs.document_validation_tab import validate_documents
+logger = setup_logger(__name__)
+st.set_page_config(layout="wide")
+# Initialize session state structures
+if 'uploads' not in st.session_state:
+    st.session_state['uploads'] = {}
+if 'current_upload' not in st.session_state:
+    st.session_state['current_upload'] = None
+st.title("🪪 Underwriting Workflow")
+upload_docs_tab, memo_tab, upload_docs_validation_results_tab, demo_validations_considered_tab = st.tabs(
+    ["Upload Documents", "Memo", "Validation Results", "Policies"]
+)
+with upload_docs_tab:
+    upload_documents()
+with memo_tab:
+    display_memo()
+with demo_validations_considered_tab:
+    display_demo_validations()
+with upload_docs_validation_results_tab:
+    validate_documents(current=st.session_state['current_upload'])

app_streamlit_bak.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import streamlit as st
+from utils.process_files import process_uploaded_files
+from utils.document_display import display_based_on_card
+import os
+import pandas as pd
+import json
+from llm.document_analyzer import analyze_files
+from PIL import Image
+from utils import setup_logger
+logger = setup_logger(__name__)
+st.set_page_config(layout="wide")
+if len(st.session_state) == 0:
+    if 'tab_ocr' not in st.session_state:
+        # if st.session_state['tab_ocr']['file_groups'] is None:
+        st.session_state = {
+            'tab_ocr': {
+                'file_groups': None,
+                'values_raw': None,
+                'values_display': None
+            }
+        }
+logger.info(f"st.session_state: {st.session_state}")
+st.title("ID Analyser")
+uploaded_files = st.file_uploader("Upload Images, PDFs", accept_multiple_files=True, type=[
+                                  "png", "jpg", "jpeg", "pdf", "zip"])
+if uploaded_files:
+    st.session_state = {
+        'tab_ocr': {
+            'file_groups': None,
+            'values_raw': None,
+            'values_display': None
+        }
+    }
+    file_paths, file_groups, temp_dir = process_uploaded_files(
+        uploaded_files)  # Remove file paths later
+    if st.session_state['tab_ocr']['file_groups'] is None:
+        st.session_state['tab_ocr']['file_groups'] = file_groups
+    analyze_clicked = st.button("Analyze")
+    if analyze_clicked:
+        st.session_state['tab_ocr']['values_raw'] = None
+        st.session_state['tab_ocr']['values_display'] = None
+    if analyze_clicked or st.session_state['tab_ocr']['values_display']:
+        # if st.button("Analyze") or st.session_state['tab_ocr']['values_display'] is not None:
+        if st.session_state['tab_ocr']['values_raw'] is None:
+            analysis_results_groups, json_output_path = analyze_files(
+                file_groups=st.session_state['tab_ocr']['file_groups'],
+                temp_dir=temp_dir)
+            st.session_state['tab_ocr']['values_raw'] = analysis_results_groups
+        if st.session_state['tab_ocr']['values_display'] is None:
+            st.session_state['tab_ocr']['values_display'] = {}
+        for original_file, extracted_files in st.session_state['tab_ocr']['file_groups'].items():
+            analysis_results_for_id = display_based_on_card(
+                original_file=original_file,
+                analysis_results_for_original_file=st.session_state[
+                    'tab_ocr']['values_raw'][original_file],
+                extracted_files=extracted_files)
+        st.download_button(
+            label="Download Analysis JSON",
+            data=json.dumps(
+                st.session_state['tab_ocr']['values_raw'], indent=4),
+            file_name="analysis_results.json",
+            mime="application/json"
+        )

checks copy.ipynb ADDED Viewed

	@@ -0,0 +1,235 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## ID: Passport"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Check against sample extracted JSON"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "from utils.prep_validators_payload import process_extracted_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "custom_app_form = {\n",
+    "    \"application_summary_full_name\": \"Jodie Pippa\",\n",
+    "    \"application_summary_bank_name\": \"HSBC\",\n",
+    "    \"application_summary_employer_name\": \"ABC Ltd\",\n",
+    "    \"application_summary_complete_address\": \"123 Maple Street, London, UK, SW1A 1AA\",\n",
+    "    \"full_name_err_msgs\": None,\n",
+    "    \"bank_name_err_msgs\": None,\n",
+    "    \"employer_name_err_msgs\": None,\n",
+    "    \"complete_employee_address_err_msgs\": None,\n",
+    "    \"is_incomplete\": False,\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'/tmp/tmp6w8qn6h6/sample_documents/sample_documents/3.pdf': {'/tmp/tmp6w8qn6h6/sample_documents/sample_documents/3.pdf_page_0.png': {'document_category': 'bank_statement', 'document_type': 'bank_statement', 'account_holder_name': 'Jodie Pippa', 'account_holder_address': '', 'bank_name': 'HSBC', 'account_number': '12345678', 'sort_code': '20-00-00', 'statement_start_date': '2025-01-01', 'statement_end_date': '2025-02-28', 'salary_credits': [{'date': '2025-01-06', 'amount': '2213.83', 'from': 'ABC Ltd', 'description': 'Salary - ABC Ltd (Pro-rated Jan)'}, {'date': '2025-02-06', 'amount': '2566.66', 'from': 'ABC Ltd', 'description': 'Salary - ABC Ltd (Full Feb Salary)'}]}}, '/tmp/tmp6w8qn6h6/sample_documents/sample_documents/5.pdf': {'/tmp/tmp6w8qn6h6/sample_documents/sample_documents/5.pdf_page_0.png': {'document_category': 'income_document', 'document_type': 'payslip', 'employee_name': 'Jodie Pippa', 'employer_name': 'ABC Ltd', 'employee_id': 'JP12345', 'employee_address': '123 Maple Street, London, UK, SW1A 1AA', 'employer_address': '456 Business Street, London, UK, SW1A 2BB', 'tax_code': '1257L', 'payslip_date': '2025-01-31', 'pay_period_start': '2025-01-06', 'pay_period_end': '2025-01-31', 'payment_frequency': 'monthly', 'basic_pay': '3333.33', 'net_pay': '2566.66', 'gross_pay': '3333.33', 'salary_components': [], 'ni_contribution': '266.67', 'tax_deduction': '333.33', 'other_deductions': [{'name': 'Pension Contribution', 'amount': '166.67'}]}}, '/tmp/tmp6w8qn6h6/sample_documents/sample_documents/2.pdf': {'/tmp/tmp6w8qn6h6/sample_documents/sample_documents/2.pdf_page_0.png': {'document_category': 'income_document', 'document_type': 'p60', 'employee_details': {'surname': 'Pippa', 'forenames_or_initials': 'Jodie', 'national_insurance_number': 'AB123456C', 'works_payroll_number': '5342'}, 'pay_and_income_tax_details': {'previous_employments': {'pay': 0.0, 'tax_deducted': 0.0}, 'current_employment': {'pay': 9545.45, 'tax_deducted': 0.0}, 'total_for_year': {'pay': 9545.45, 'tax_deducted': 0.0}, 'final_tax_code': '1257'}, 'national_insurance_contributions': [{'nic_letter': 'A', 'earnings': {'at_or_above_lel': 6396.0, 'above_lel_up_to_pt': 0.0, 'above_pt_up_to_uel': 3149.45}, 'employee_contributions_above_pt': 377.93}], 'statutory_payments': {'maternity_pay': 0.0, 'paternity_pay': 0.0, 'adoption_pay': 0.0, 'shared_parental_pay': 0.0}, 'other_details': {'student_loan_deductions': 0.0}, 'employer_details': {'employer_name_and_address': None, 'paye_reference': '123/AB456'}}}, '/tmp/tmp6w8qn6h6/sample_documents/sample_documents/1.pdf': {'/tmp/tmp6w8qn6h6/sample_documents/sample_documents/1.pdf_page_0.png': {'document_category': 'identity_verification_document', 'document_type': 'passport', 'full_name': 'UNITED-KINGDOM-FIVE JODIE PIPPA', 'surname': 'UNITED-KINGDOM-FIVE', 'given_names': 'JODIE PIPPA', 'passport_number': '107185703', 'nationality': 'BRITISH CITIZEN', 'date_of_birth': '1985-01-17', 'place_of_birth': 'LONDON', 'sex': 'F', 'date_of_issue': '2006-01-31', 'date_of_expiry': '2016-01-31', 'issuing_authority': 'UKPA', 'passport_type': 'P', 'country_code': 'GBR', 'mrz_line_1': 'P<GBRUNITED<KINGDOM<FIVE<<JODIE<PIPPA<<<<<<<', 'mrz_line_2': '1071857032GBR8501178F1601312<<<<<<<<<<<<<<02'}}}\n"
+     ]
+    }
+   ],
+   "source": [
+    "with open(\"../analysis_results.json\", \"r\") as f:\n",
+    "    full_data = json.load(f)\n",
+    "\n",
+    "print(full_data)\n",
+    "# print(process_extracted_data(full_data, custom_app_form))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'payslips': [{'pay_period_start_date': datetime.date(2025, 1, 6), 'pay_period_end_date': datetime.date(2025, 1, 31), 'pay_period_days': None, 'pay_date': datetime.date(2025, 1, 31), 'full_name': 'Jodie Pippa', 'employer_name': 'ABC Ltd', 'is_basic_pay_net_pay_other_salary_components_present': True, 'is_tax_deducation_present': True, 'is_ni_deduction_present': True, 'complete_employee_address': '123 Maple Street, London, UK, SW1A 1AA', 'pay_dates_err_msgs': \"Pay date must be within the last 35 days & not in the future, Pay period's start date & end date must have a gap of at least 28 days\", 'full_name_err_msgs': None, 'employer_name_err_msgs': None, 'payslip_line_item_presence_err_msgs': None, 'complete_employee_address_err_msgs': None, 'validation_policy_status_df':                                               Policy        Value  Status  \\\n",
+      "0            Applicant's full name should be present  Jodie Pippa    True   \n",
+      "1  Full name must have a length of at least 2 & a...           11    True   \n",
+      "2  Full name must consist of at least 2 words (fi...            2    True   \n",
+      "3              Name should match with provided value  Jodie Pippa    True   \n",
+      "4                      Employer name must be present      ABC Ltd    True   \n",
+      "5       Employer name must match with provided value      ABC Ltd    True   \n",
+      "\n",
+      "                                             Message  \n",
+      "0                   Applicant's full name is present  \n",
+      "1  Full name has a length of at least 2 & at most 61  \n",
+      "2  Full name consists of at least 2 words (first ...  \n",
+      "3                   Name matches with provided value  \n",
+      "4                           Employer name is present  \n",
+      "5          Employer name matches with provided value  , 'is_red_flagged': True}], 'bank_statements': [{'statement_start_date': datetime.date(2025, 1, 1), 'statement_end_date': datetime.date(2025, 2, 28), 'first_salary_deposit_date_present': 6, 'bank_name': 'HSBC', 'full_name': 'Jodie Pippa', 'account_number': '12345678', 'sort_code': '20-00-00', 'account_statement_date_err_msgs': None, 'full_name_err_msgs': None, 'bank_name_err_msgs': None, 'account_number_err_msgs': None, 'sort_code_err_msgs': None, 'salary_deposit_err_msgs': None, 'is_red_flagged': False}], 'passports': [{'full_name': 'JODIE PIPPA', 'expiry_date': datetime.date(2016, 1, 31), 'full_name_err_msgs': None, 'expiry_date_err_msgs': 'Provided passport expires within 1 year', 'is_red_flagged': True}]}\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(process_extracted_data(full_data, custom_app_form))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Policy</th>\n",
+       "      <th>Value</th>\n",
+       "      <th>Status</th>\n",
+       "      <th>Message</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Applicant's full name should be present</td>\n",
+       "      <td>Jodie Pippa</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Applicant's full name is present</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Full name must have a length of at least 2 &amp; a...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Full name has a length of at least 2 &amp; at most 61</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Full name must consist of at least 2 words (fi...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Full name consists of at least 2 words (first ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Name should match with provided value</td>\n",
+       "      <td>Jodie Pippa</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Name matches with provided value</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Employer name must be present</td>\n",
+       "      <td>ABC Ltd</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Employer name is present</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Employer name must match with provided value</td>\n",
+       "      <td>ABC Ltd</td>\n",
+       "      <td>True</td>\n",
+       "      <td>Employer name matches with provided value</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                              Policy        Value  Status  \\\n",
+       "0            Applicant's full name should be present  Jodie Pippa    True   \n",
+       "1  Full name must have a length of at least 2 & a...           11    True   \n",
+       "2  Full name must consist of at least 2 words (fi...            2    True   \n",
+       "3              Name should match with provided value  Jodie Pippa    True   \n",
+       "4                      Employer name must be present      ABC Ltd    True   \n",
+       "5       Employer name must match with provided value      ABC Ltd    True   \n",
+       "\n",
+       "                                             Message  \n",
+       "0                   Applicant's full name is present  \n",
+       "1  Full name has a length of at least 2 & at most 61  \n",
+       "2  Full name consists of at least 2 words (first ...  \n",
+       "3                   Name matches with provided value  \n",
+       "4                           Employer name is present  \n",
+       "5          Employer name matches with provided value  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = process_extracted_data(full_data, custom_app_form)\n",
+    "a['payslips'][0]['validation_policy_status_df']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hsbc_uk_demo_venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

config.toml ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ [server]
2	+
3	+ maxUploadSize = 10

llm/__init__.py ADDED Viewed

File without changes

llm/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (169 Bytes). View file

llm/__pycache__/document_analyzer.cpython-313.pyc ADDED Viewed

Binary file (3.88 kB). View file

llm/__pycache__/llm.cpython-313.pyc ADDED Viewed

Binary file (2.21 kB). View file

llm/document_analyzer.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import streamlit as st
+from PIL import Image
+import json
+import os
+from utils import im_2_b64, load_pdf_as_image, generate_metadata
+from .llm import DocumentLLM
+from prompts import (document_type_prompt, passport_prompt,
+                     payslip_prompt, bank_statement_prompt,
+                     p60_prompt, driving_license_prompt,
+                     genric_ocr_prompt)
+from utils.json_utils import restructure_documents
+from utils import setup_logger
+logger = setup_logger(__name__)
+def analyze_files(file_groups: dict, temp_dir, current_upload):
+    document_llm = DocumentLLM()
+    results_group = {}
+    for original_file, extracted_files in file_groups.items():
+        results = {}
+        for file_name in extracted_files:
+            results[file_name] = {"status": "processed",
+                                  "type": "image", "dummy_data": 12345}
+            logger.info(f"file_name : {file_name}")
+            extension = file_name.lower().split('.')[-1]
+            results[file_name] = generate_metadata(file_name)
+            try:
+                logger.info(f"Starting analysis for {file_name}")
+                if extension in ['jpg', 'jpeg', 'png', 'gif']:
+                    image = Image.open(file_name)
+                    image_buffer = im_2_b64(image)
+                elif extension == 'pdf':
+                    img = load_pdf_as_image(file_name)
+                    image_buffer = im_2_b64(img)
+                    st.image(img, use_container_width=True)
+                else:
+                    st.write(
+                        f"Unsupported file format: {extension}")
+                if image_buffer is not None:
+                    results[file_name] = document_llm.call_llm_api(
+                        prompt=document_type_prompt,
+                        image_path=file_name)
+                    logger.info(
+                        f"File name: {file_name}, Results: {results[file_name]}")
+                    document_type = results[file_name].get(
+                        'document_type', None)
+                    if document_type is not None:
+                        prompt = None
+                        if document_type == 'passport':
+                            prompt = passport_prompt
+                        elif document_type == 'driving_license':
+                            prompt = driving_license_prompt
+                        elif document_type == 'bank_statement':
+                            prompt = bank_statement_prompt
+                        elif document_type == 'payslip':
+                            prompt = payslip_prompt
+                        elif document_type == 'p60':
+                            prompt = p60_prompt
+                        else:
+                            prompt = genric_ocr_prompt
+                        if prompt is not None:
+                            data = document_llm.call_llm_api(
+                                prompt=prompt,
+                                image_path=file_name)
+                            results[file_name].update(data)
+                            logger.info(f"{file_name}: {data}")
+            except Exception as e:
+                st.error(f"Error processing {file_name}: {str(e)}")
+            image_buffer = None
+        results_group[original_file] = results
+    results_transformed = restructure_documents(results_group)
+    st.session_state['uploads'][current_upload]['results_transformed'] = results_transformed
+    # Save analysis results to a JSON file
+    json_output_path = os.path.join(
+        temp_dir, "analysis_results.json")
+    with open(json_output_path, "w") as json_file:
+        json.dump(results_group, json_file, indent=4)
+    return results_group, json_output_path

llm/llm.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from pydantic import BaseModel
+from google import genai
+from google.genai.types import HttpOptions
+from dotenv import load_dotenv
+import os
+import json
+import re
+from vertexai.generative_models import Part, Image, GenerativeModel
+from utils import setup_logger
+import vertexai
+logger = setup_logger(__name__)
+load_dotenv()
+project = os.getenv("GOOGLE_CLOUD_PROJECT")
+location = os.getenv("GOOGLE_CLOUD_LOCATION")
+vertexai.init(project=project, location=location)
+class DocumentLLM(BaseModel):
+    def call_llm_api(self, prompt, image_path):
+        model: GenerativeModel = GenerativeModel(
+            model_name="gemini-2.0-flash-001")
+        text_part = Part.from_text(prompt)
+        image_part = Part.from_image(Image.load_from_file(image_path))
+        response = model.generate_content([
+            image_part,
+            text_part])
+        content = response.text
+        try:
+            content = json.loads(content)
+        except Exception as e:
+            logger.info(f"Json is being formatted")
+            content = re.sub(r"^```json\s*|\s*```$", "",
+                             content, flags=re.MULTILINE)
+            # Parse JSON
+            content = json.loads(content)
+        return content

logs_directory/app_20250414.log ADDED Viewed

	@@ -0,0 +1,78 @@

+2025-04-14 22:13:56 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpq7o86ysz/1.pdf_page_0.png
+2025-04-14 22:14:01 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'File Name': '1.pdf_page_0.png', 'Directory': 'tmpq7o86ysz', 'File Size': '943.88 KB', 'Last Modified': '2025-04-14 22:13:56', 'Created': '2025-04-14 22:13:56', 'File Extension': '.png', 'Full Path': '/tmp/tmpq7o86ysz/1.pdf_page_0.png', 'Image Size': '1700x2200', 'Image Mode': 'RGB', 'Image Format': 'PNG'}
+2025-04-14 22:14:01 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmpq7o86ysz/1.pdf: None
+2025-04-14 22:14:01 - utils.document_display - INFO - [document_display.py:245] - Exception for processing analysis results of {'File Name': '1.pdf_page_0.png', 'Directory': 'tmpq7o86ysz', 'File Size': '943.88 KB', 'Last Modified': '2025-04-14 22:13:56', 'Created': '2025-04-14 22:13:56', 'File Extension': '.png', 'Full Path': '/tmp/tmpq7o86ysz/1.pdf_page_0.png', 'Image Size': '1700x2200', 'Image Mode': 'RGB', 'Image Format': 'PNG'}: 'NoneType' object has no attribute 'lower'
+2025-04-14 22:14:01 - utils.document_display - INFO - [document_display.py:249] - analysis_results_for_id_updated : {'File Name': '1.pdf_page_0.png', 'Directory': 'tmpq7o86ysz', 'File Size': '943.88 KB', 'Last Modified': '2025-04-14 22:13:56', 'Created': '2025-04-14 22:13:56', 'File Extension': '.png', 'Full Path': '/tmp/tmpq7o86ysz/1.pdf_page_0.png', 'Image Size': '1700x2200', 'Image Mode': 'RGB', 'Image Format': 'PNG'}
+2025-04-14 22:14:01 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmpq7o86ysz/1.pdf_page_0.png']
+2025-04-14 22:18:51 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpqmaed05g/1.pdf_page_0.png
+2025-04-14 22:18:51 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmpqmaed05g/1.pdf_page_0.png
+2025-04-14 22:18:55 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'File Name': '1.pdf_page_0.png', 'Directory': 'tmpqmaed05g', 'File Size': '943.88 KB', 'Last Modified': '2025-04-14 22:18:51', 'Created': '2025-04-14 22:18:51', 'File Extension': '.png', 'Full Path': '/tmp/tmpqmaed05g/1.pdf_page_0.png', 'Image Size': '1700x2200', 'Image Mode': 'RGB', 'Image Format': 'PNG'}
+2025-04-14 22:18:55 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmpqmaed05g/1.pdf: None
+2025-04-14 22:18:55 - utils.document_display - INFO - [document_display.py:249] - analysis_results_for_id_updated : {'document_type': None}
+2025-04-14 22:18:55 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmpqmaed05g/1.pdf_page_0.png']
+2025-04-14 22:37:29 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmp5dfq1etu/1.pdf_page_0.png
+2025-04-14 22:37:29 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmp5dfq1etu/1.pdf_page_0.png
+2025-04-14 22:37:35 - llm.document_analyzer - INFO - [document_analyzer.py:51] - File name: /tmp/tmp5dfq1etu/1.pdf_page_0.png, Results: {'document_category': 'identity_verification_document', 'document_type': 'passport'}
+2025-04-14 22:37:40 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:37:40 - llm.document_analyzer - INFO - [document_analyzer.py:76] - /tmp/tmp5dfq1etu/1.pdf_page_0.png: {'full_name': 'UNITED-KINGDOM-FIVE JODIE PIPPA', 'surname': 'UNITED-KINGDOM-FIVE', 'given_names': 'JODIE PIPPA', 'passport_number': '107185703', 'nationality': 'BRITISH CITIZEN', 'date_of_birth': '1985-01-17', 'place_of_birth': 'LONDON', 'sex': 'F', 'date_of_issue': '2006-01-31', 'date_of_expiry': '2016-01-31', 'issuing_authority': 'UKPA', 'passport_type': 'P', 'country_code': 'GBR', 'mrz_line_1': 'P<GBRUNITED<KINGDOM<FIVE<<JODIE<PIPPA<<<<<<<', 'mrz_line_2': '1071857032GBR8501178F1601312<<<<<<<<<<<<<<02'}
+2025-04-14 22:37:40 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'document_category': 'identity_verification_document', 'document_type': 'passport', 'full_name': 'UNITED-KINGDOM-FIVE JODIE PIPPA', 'surname': 'UNITED-KINGDOM-FIVE', 'given_names': 'JODIE PIPPA', 'passport_number': '107185703', 'nationality': 'BRITISH CITIZEN', 'date_of_birth': '1985-01-17', 'place_of_birth': 'LONDON', 'sex': 'F', 'date_of_issue': '2006-01-31', 'date_of_expiry': '2016-01-31', 'issuing_authority': 'UKPA', 'passport_type': 'P', 'country_code': 'GBR', 'mrz_line_1': 'P<GBRUNITED<KINGDOM<FIVE<<JODIE<PIPPA<<<<<<<', 'mrz_line_2': '1071857032GBR8501178F1601312<<<<<<<<<<<<<<02'}
+2025-04-14 22:37:40 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmp5dfq1etu/1.pdf: passport
+2025-04-14 22:37:40 - utils.document_display - INFO - [document_display.py:249] - analysis_results_for_id_updated : {'document_category': 'identity_verification_document', 'document_type': 'passport', 'passport_number': '107185703', 'full_name': 'UNITED-KINGDOM-FIVE JODIE PIPPA', 'date_of_birth': '1985-01-17', 'nationality': 'BRITISH CITIZEN', 'date_of_issue': '2006-01-31', 'gender': None, 'address': None}
+2025-04-14 22:37:40 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmp5dfq1etu/1.pdf_page_0.png']
+2025-04-14 22:37:40 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmp5dfq1etu/1.pdf_page_0.png']
+2025-04-14 22:48:54 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpfmxw59lw/sample_documents/sample_documents/3.pdf_page_0.png
+2025-04-14 22:48:54 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmpfmxw59lw/sample_documents/sample_documents/3.pdf_page_0.png
+2025-04-14 22:48:59 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:48:59 - llm.document_analyzer - INFO - [document_analyzer.py:51] - File name: /tmp/tmpfmxw59lw/sample_documents/sample_documents/3.pdf_page_0.png, Results: {'document_category': 'bank_statement', 'document_type': 'bank_statement'}
+2025-04-14 22:49:03 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:03 - llm.document_analyzer - INFO - [document_analyzer.py:76] - /tmp/tmpfmxw59lw/sample_documents/sample_documents/3.pdf_page_0.png: {'account_holder_name': 'Jodie Pippa', 'account_holder_address': '', 'statement_start_date': '2025-01-01', 'statement_end_date': '2025-02-28', 'salary_credits': [{'date': '2025-01-06', 'amount': '2213.83', 'from': 'ABC Ltd', 'description': 'Salary - ABC Ltd (Pro-rated Jan)'}, {'date': '2025-02-06', 'amount': '2566.66', 'from': 'ABC Ltd', 'description': 'Salary - ABC Ltd (Full Feb Salary)'}]}
+2025-04-14 22:49:03 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpfmxw59lw/sample_documents/sample_documents/5.pdf_page_0.png
+2025-04-14 22:49:03 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmpfmxw59lw/sample_documents/sample_documents/5.pdf_page_0.png
+2025-04-14 22:49:08 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:08 - llm.document_analyzer - INFO - [document_analyzer.py:51] - File name: /tmp/tmpfmxw59lw/sample_documents/sample_documents/5.pdf_page_0.png, Results: {'document_category': 'income_document', 'document_type': 'payslip'}
+2025-04-14 22:49:12 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:12 - llm.document_analyzer - INFO - [document_analyzer.py:76] - /tmp/tmpfmxw59lw/sample_documents/sample_documents/5.pdf_page_0.png: {'employee_name': 'Jodie Pippa', 'employer_name': 'ABC Ltd', 'payslip_date': '2025-01-31', 'pay_period_start': '2025-01-06', 'pay_period_end': '2025-01-31', 'payment_frequency': 'monthly', 'basic_pay': '3333.33', 'net_pay': '2566.66', 'gross_pay': '3333.33', 'salary_components': [], 'ni_contribution': '266.67', 'tax_deduction': '333.33', 'other_deductions': [{'name': 'Pension Contribution', 'amount': '166.67'}]}
+2025-04-14 22:49:12 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpfmxw59lw/sample_documents/sample_documents/2.pdf_page_0.png
+2025-04-14 22:49:12 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmpfmxw59lw/sample_documents/sample_documents/2.pdf_page_0.png
+2025-04-14 22:49:17 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:17 - llm.document_analyzer - INFO - [document_analyzer.py:51] - File name: /tmp/tmpfmxw59lw/sample_documents/sample_documents/2.pdf_page_0.png, Results: {'document_category': 'income_document', 'document_type': 'payslip'}
+2025-04-14 22:49:22 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:22 - llm.document_analyzer - INFO - [document_analyzer.py:76] - /tmp/tmpfmxw59lw/sample_documents/sample_documents/2.pdf_page_0.png: {'employee_name': 'Jodie Pippa', 'employer_name': '', 'payslip_date': '', 'pay_period_start': '', 'pay_period_end': '', 'payment_frequency': '', 'basic_pay': '', 'net_pay': '', 'gross_pay': '9545.45', 'salary_components': [], 'ni_contribution': '377.93', 'tax_deduction': '0', 'other_deductions': [{'name': 'Student Loan deductions', 'amount': '0'}]}
+2025-04-14 22:49:22 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_0.png
+2025-04-14 22:49:22 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_0.png
+2025-04-14 22:49:26 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:26 - llm.document_analyzer - INFO - [document_analyzer.py:51] - File name: /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_0.png, Results: {'document_category': 'unknown', 'document_type': 'unknown'}
+2025-04-14 22:49:32 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:32 - llm.document_analyzer - INFO - [document_analyzer.py:76] - /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_0.png: {'Contract of Employment': {'Employee Name': 'Jodie Pippa', 'Job Title': 'Sales Manager', 'Start Date': '6th January 2025', 'Employer': 'ABC Ltd', 'Address': '456 Business Street, London, UK, SW1A 2BB', 'Employee Address': '123 Maple Street, London, UK, SW1A 1AA', 'Job Title and Duties': {'Job Title': 'Sales Manager', 'Reporting To': 'Managing Director', 'Main Duties and Responsibilities': ['Manage the sales team to achieve monthly and annual sales targets.', 'Develop and implement sales strategies to grow the business.', 'Build and maintain relationships with key clients.', 'Prepare sales reports and forecasts for senior management.']}, 'Place of Work': "The normal place of work is the company's office at 456 Business Street, London, UK, SW1A 2BB. However, the employee may be required to work at other locations as necessary.", 'Hours of Work': 'The normal working hours are 40 hours per week, Monday to Friday, 9:00 AM to 5:30 PM, with a one-hour unpaid lunch break.', 'Salary and Benefits': {'Basic Salary': '£40,000 per annum, payable monthly in arrears on the last working day of each month.', 'Bonus Scheme': 'Eligible for a performance-based bonus of up to 10% of annual salary.', 'Pension': 'Auto-enrolment into the company pension scheme in line with UK legislation.', 'Holiday Entitlement': '25 days per annum plus UK public holidays.'}, 'Probationary Period': "The first 3 months of employment will be a probationary period. During this time, the employee's suitability for the role will be assessed.", 'Termination of Employment': {'Notice Periods': ["During probationary period: 1 week's notice by either party.", "After probationary period: 1 month's notice by the employee, 2 months' notice by the employer.Summary Dismissal: The employer reserves the right to terminate employment without notice in cases of gross misconduct."]}}}
+2025-04-14 22:49:32 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_1.png
+2025-04-14 22:49:32 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_1.png
+2025-04-14 22:49:36 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:36 - llm.document_analyzer - INFO - [document_analyzer.py:51] - File name: /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_1.png, Results: {'document_category': 'unknown', 'document_type': 'unknown'}
+2025-04-14 22:49:42 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:42 - llm.document_analyzer - INFO - [document_analyzer.py:76] - /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_1.png: {'clauses': [{'clause_number': '7', 'title': 'Confidentiality and Data Protection', 'content': 'The employee agrees to maintain the confidentiality of all company information and comply with the UK Data Protection Act 2018 and GDPR.'}, {'clause_number': '8', 'title': 'Intellectual Property', 'content': 'Any intellectual property created by the employee during the course of employment shall belong to the company.'}, {'clause_number': '9', 'title': 'Grievance and Disciplinary Procedures', 'content': "The company's grievance and disciplinary procedures will apply, as outlined in the employee handbook."}, {'clause_number': '10', 'title': 'Health and Safety', 'content': "The employee agrees to comply with the company's health and safety policies and procedures."}, {'clause_number': '11', 'title': 'Mobility Clause', 'content': 'The employee may be required to work at other locations within the UK or travel as necessary for business purposes.'}, {'clause_number': '12', 'title': 'Entire Agreement', 'content': 'This contract constitutes the entire agreement between the parties and supersedes any previous agreements or understandings.'}], 'signatures': {'employer': {'name': None, 'position': None, 'date': None}, 'employee': {'name': 'Jodie Pippa', 'date': None}}}
+2025-04-14 22:49:42 - llm.document_analyzer - INFO - [document_analyzer.py:26] - file_name : /tmp/tmpfmxw59lw/sample_documents/sample_documents/1.pdf_page_0.png
+2025-04-14 22:49:42 - llm.document_analyzer - INFO - [document_analyzer.py:32] - Starting analysis for /tmp/tmpfmxw59lw/sample_documents/sample_documents/1.pdf_page_0.png
+2025-04-14 22:49:46 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:46 - llm.document_analyzer - INFO - [document_analyzer.py:51] - File name: /tmp/tmpfmxw59lw/sample_documents/sample_documents/1.pdf_page_0.png, Results: {'document_category': 'identity_verification_document', 'document_type': 'passport'}
+2025-04-14 22:49:52 - llm.llm - INFO - [llm.py:40] - Json is being formatted
+2025-04-14 22:49:52 - llm.document_analyzer - INFO - [document_analyzer.py:76] - /tmp/tmpfmxw59lw/sample_documents/sample_documents/1.pdf_page_0.png: {'full_name': 'UNITED-KINGDOM-FIVE JODIE PIPPA', 'surname': 'UNITED-KINGDOM-FIVE', 'given_names': 'JODIE PIPPA', 'passport_number': '107185703', 'nationality': 'BRITISH CITIZEN', 'date_of_birth': '1985-01-17', 'place_of_birth': 'LONDON', 'sex': 'F', 'date_of_issue': '2006-01-31', 'date_of_expiry': '2016-01-31', 'issuing_authority': 'UKPA', 'passport_type': 'P', 'country_code': 'GBR', 'mrz_line_1': 'P<GBRUNITED<KINGDOM<FIVE<<JODIE<PIPPA<<<<<<<', 'mrz_line_2': '1071857032GBR8501178F1601312<<<<<<<<<<<<<<02'}
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'document_category': 'bank_statement', 'document_type': 'bank_statement', 'account_holder_name': 'Jodie Pippa', 'account_holder_address': '', 'statement_start_date': '2025-01-01', 'statement_end_date': '2025-02-28', 'salary_credits': [{'date': '2025-01-06', 'amount': '2213.83', 'from': 'ABC Ltd', 'description': 'Salary - ABC Ltd (Pro-rated Jan)'}, {'date': '2025-02-06', 'amount': '2566.66', 'from': 'ABC Ltd', 'description': 'Salary - ABC Ltd (Full Feb Salary)'}]}
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmpfmxw59lw/sample_documents/sample_documents/3.pdf: bank_statement
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:251] - analysis_results_for_id_updated : {'document_type': 'bank_statement'}
+2025-04-14 22:49:52 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmpfmxw59lw/sample_documents/sample_documents/3.pdf_page_0.png']
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'document_category': 'income_document', 'document_type': 'payslip', 'employee_name': 'Jodie Pippa', 'employer_name': 'ABC Ltd', 'payslip_date': '2025-01-31', 'pay_period_start': '2025-01-06', 'pay_period_end': '2025-01-31', 'payment_frequency': 'monthly', 'basic_pay': '3333.33', 'net_pay': '2566.66', 'gross_pay': '3333.33', 'salary_components': [], 'ni_contribution': '266.67', 'tax_deduction': '333.33', 'other_deductions': [{'name': 'Pension Contribution', 'amount': '166.67'}]}
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmpfmxw59lw/sample_documents/sample_documents/5.pdf: payslip
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:251] - analysis_results_for_id_updated : {'document_category': 'income_document', 'document_type': 'payslip', 'employee_name': 'Jodie Pippa', 'employer_name': 'ABC Ltd', 'payslip_date': '2025-01-31', 'pay_period_start': '2025-01-06', 'pay_period_end': '2025-01-31', 'payment_frequency': 'monthly', 'basic_pay': '3333.33', 'net_pay': '2566.66', 'gross_pay': '3333.33', 'salary_components': [], 'ni_contribution': '266.67', 'tax_deduction': '333.33', 'other_deductions': [{'name': 'Pension Contribution', 'amount': '166.67'}]}
+2025-04-14 22:49:52 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmpfmxw59lw/sample_documents/sample_documents/5.pdf_page_0.png']
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'document_category': 'income_document', 'document_type': 'payslip', 'employee_name': 'Jodie Pippa', 'employer_name': '', 'payslip_date': '', 'pay_period_start': '', 'pay_period_end': '', 'payment_frequency': '', 'basic_pay': '', 'net_pay': '', 'gross_pay': '9545.45', 'salary_components': [], 'ni_contribution': '377.93', 'tax_deduction': '0', 'other_deductions': [{'name': 'Student Loan deductions', 'amount': '0'}]}
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmpfmxw59lw/sample_documents/sample_documents/2.pdf: payslip
+2025-04-14 22:49:52 - utils.document_display - INFO - [document_display.py:251] - analysis_results_for_id_updated : {'document_category': 'income_document', 'document_type': 'payslip', 'employee_name': 'Jodie Pippa', 'employer_name': '', 'payslip_date': '', 'pay_period_start': '', 'pay_period_end': '', 'payment_frequency': '', 'basic_pay': '', 'net_pay': '', 'gross_pay': '9545.45', 'salary_components': [], 'ni_contribution': '377.93', 'tax_deduction': '0', 'other_deductions': [{'name': 'Student Loan deductions', 'amount': '0'}]}
+2025-04-14 22:49:52 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmpfmxw59lw/sample_documents/sample_documents/2.pdf_page_0.png']
+2025-04-14 22:49:53 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'document_category': 'unknown', 'Contract of Employment': {'Employee Name': 'Jodie Pippa', 'Job Title': 'Sales Manager', 'Start Date': '6th January 2025', 'Employer': 'ABC Ltd', 'Address': '456 Business Street, London, UK, SW1A 2BB', 'Employee Address': '123 Maple Street, London, UK, SW1A 1AA', 'Job Title and Duties': {'Job Title': 'Sales Manager', 'Reporting To': 'Managing Director', 'Main Duties and Responsibilities': ['Manage the sales team to achieve monthly and annual sales targets.', 'Develop and implement sales strategies to grow the business.', 'Build and maintain relationships with key clients.', 'Prepare sales reports and forecasts for senior management.']}, 'Place of Work': "The normal place of work is the company's office at 456 Business Street, London, UK, SW1A 2BB. However, the employee may be required to work at other locations as necessary.", 'Hours of Work': 'The normal working hours are 40 hours per week, Monday to Friday, 9:00 AM to 5:30 PM, with a one-hour unpaid lunch break.', 'Salary and Benefits': {'Basic Salary': '£40,000 per annum, payable monthly in arrears on the last working day of each month.', 'Bonus Scheme': 'Eligible for a performance-based bonus of up to 10% of annual salary.', 'Pension': 'Auto-enrolment into the company pension scheme in line with UK legislation.', 'Holiday Entitlement': '25 days per annum plus UK public holidays.'}, 'Probationary Period': "The first 3 months of employment will be a probationary period. During this time, the employee's suitability for the role will be assessed.", 'Termination of Employment': {'Notice Periods': ["During probationary period: 1 week's notice by either party.", "After probationary period: 1 month's notice by the employee, 2 months' notice by the employer.Summary Dismissal: The employer reserves the right to terminate employment without notice in cases of gross misconduct."]}}, 'document_type': 'unknown', 'signatures': {'employer': {'name': None, 'position': None, 'date': None}, 'employee': {'name': 'Jodie Pippa', 'date': None}}, 'clauses': [{'clause_number': '7', 'title': 'Confidentiality and Data Protection', 'content': 'The employee agrees to maintain the confidentiality of all company information and comply with the UK Data Protection Act 2018 and GDPR.'}, {'clause_number': '8', 'title': 'Intellectual Property', 'content': 'Any intellectual property created by the employee during the course of employment shall belong to the company.'}, {'clause_number': '9', 'title': 'Grievance and Disciplinary Procedures', 'content': "The company's grievance and disciplinary procedures will apply, as outlined in the employee handbook."}, {'clause_number': '10', 'title': 'Health and Safety', 'content': "The employee agrees to comply with the company's health and safety policies and procedures."}, {'clause_number': '11', 'title': 'Mobility Clause', 'content': 'The employee may be required to work at other locations within the UK or travel as necessary for business purposes.'}, {'clause_number': '12', 'title': 'Entire Agreement', 'content': 'This contract constitutes the entire agreement between the parties and supersedes any previous agreements or understandings.'}]}
+2025-04-14 22:49:53 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf: unknown
+2025-04-14 22:49:53 - utils.document_display - INFO - [document_display.py:251] - analysis_results_for_id_updated : {'document_type': 'unknown'}
+2025-04-14 22:49:53 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_0.png', '/tmp/tmpfmxw59lw/sample_documents/sample_documents/4.pdf_page_1.png']
+2025-04-14 22:49:53 - utils.document_display - INFO - [document_display.py:148] - analysis_results_for_id : {'document_category': 'identity_verification_document', 'document_type': 'passport', 'full_name': 'UNITED-KINGDOM-FIVE JODIE PIPPA', 'surname': 'UNITED-KINGDOM-FIVE', 'given_names': 'JODIE PIPPA', 'passport_number': '107185703', 'nationality': 'BRITISH CITIZEN', 'date_of_birth': '1985-01-17', 'place_of_birth': 'LONDON', 'sex': 'F', 'date_of_issue': '2006-01-31', 'date_of_expiry': '2016-01-31', 'issuing_authority': 'UKPA', 'passport_type': 'P', 'country_code': 'GBR', 'mrz_line_1': 'P<GBRUNITED<KINGDOM<FIVE<<JODIE<PIPPA<<<<<<<', 'mrz_line_2': '1071857032GBR8501178F1601312<<<<<<<<<<<<<<02'}
+2025-04-14 22:49:53 - utils.document_display - INFO - [document_display.py:167] - document_type for /tmp/tmpfmxw59lw/sample_documents/sample_documents/1.pdf: passport
+2025-04-14 22:49:53 - utils.document_display - INFO - [document_display.py:251] - analysis_results_for_id_updated : {'document_category': 'identity_verification_document', 'document_type': 'passport', 'passport_number': '107185703', 'full_name': 'UNITED-KINGDOM-FIVE JODIE PIPPA', 'date_of_birth': '1985-01-17', 'nationality': 'BRITISH CITIZEN', 'date_of_issue': '2006-01-31', 'date_of_expiry': '2016-01-31', 'sex': 'F', 'address': None}
+2025-04-14 22:49:53 - __main__ - INFO - [app_streamlit.py:40] - file_path while displaying: ['/tmp/tmpfmxw59lw/sample_documents/sample_documents/1.pdf_page_0.png']

logs_directory/app_20250415.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs_directory/app_20250416.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs_directory/app_20250417.log ADDED Viewed

The diff for this file is too large to render. See raw diff

logs_directory/app_20250420.log ADDED Viewed

The diff for this file is too large to render. See raw diff

prompts/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .document_type import document_type_prompt
+from .bank_statement.bank_statement import bank_statement_prompt
+from .identity_documents.passport import passport_prompt
+from .identity_documents.driving_license import driving_license_prompt
+from .income_document.p60 import p60_prompt
+from .income_document.payslip import payslip_prompt
+from .genric_ocr import genric_ocr_prompt

prompts/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (631 Bytes). View file

prompts/__pycache__/document_type.cpython-313.pyc ADDED Viewed

Binary file (2.37 kB). View file

prompts/__pycache__/genric_ocr.cpython-313.pyc ADDED Viewed

Binary file (522 Bytes). View file

prompts/bank_statement/__pycache__/bank_statement.cpython-313.pyc ADDED Viewed

Binary file (1.92 kB). View file

prompts/bank_statement/bank_statement.py ADDED Viewed

	@@ -0,0 +1,67 @@

+bank_statement_prompt = """
+🏦 Bank Statement Information Extraction Prompt
+You are a document information extraction assistant.
+You will be given an image of a bank statement. Your task is to extract structured data that can be used to verify a customer's identity and income information.
+📝 Extract the following fields:
+1. Identity Information
+- account_holder_name
+- account_holder_address
+- bank_name
+- account_number
+- sort_code
+2. Statement Period
+- statement_start_date (format: YYYY-MM-DD)
+- statement_end_date (format: YYYY-MM-DD)
+3. Income Information
+- salary_credits — an array of objects, where each object contains:
+- date (of credit) (format: YYYY-MM-DD)
+- amount
+- from — From account details
+- description
+📦 Output Format
+{
+  "account_holder_name": "",
+  "account_holder_address": "",
+  "bank_name": "",
+  "account_number: "",
+  "sort_code: "",
+  "statement_start_date": "",
+  "statement_end_date": "",
+  "salary_credits": [
+    {
+      "date": "", Dates must be in YYYY-MM-DD format.
+      "amount": "",
+      "from" : "",
+      "description": ""
+    }
+  ]
+}
+📌 Instructions
+Identify salary credits based on transaction descriptions (e.g. containing "Salary", "SAL", "Payroll", "Company Name", etc.).
+Dates must be in YYYY-MM-DD format.
+If no data is available for a field, give null.
+Only return the structured JSON — no explanation or extra content.
+While extracting user full name, then make sure to extract first name followed by last name
+✅ With this output, we will validate:
+Name
+Address
+Presence of salary credit
+Salary credits across different months
+Salary consistency (regularity & similar amount)
+That the statement period covers at least 28 days
+"""

prompts/document_type.py ADDED Viewed

	@@ -0,0 +1,103 @@

+document_type_prompt = """
+Document Type Identification Agent Prompt
+You are a document classification assistant.
+You will be given an image of a document. Your task is to analyze its content and identify the most appropriate document_category and document_type.
+Here are the valid document categories and their corresponding types:
+{
+  "identity_verification_document": [
+    "passport",
+    "driving_license",
+    "national_identity_card",
+    "other
+  ],
+  "bank_statement": [
+    "bank_statement",
+    "other
+  ],
+  "income_document": [
+    "payslip",
+    "p60",
+    "contract_of_employment",
+    "other
+  ]
+}
+🧪 Few-shot examples
+Example 1:
+Image shows a government-issued ID with a photo, name, nationality, and passport number.
+{
+  "document_category": "identity_verification_document",
+  "document_type": "passport"
+}
+Example 2:
+Image shows a payslip with details like gross salary, deductions, employer name, and pay period.
+{
+  "document_category": "income_document",
+  "document_type": "payslip"
+}
+Example 3:
+Image shows a monthly bank statement with account number, transaction list, balances, and bank branding.
+{
+  "document_category": "bank_statement",
+  "document_type": "bank_statement"
+}
+Example 4:
+Image shows a plastic card with license number, issue/expiry dates, categories of vehicles, and a photo.
+{
+  "document_category": "identity_verification_document",
+  "document_type": "driving_license"
+}
+Example 5:
+Image contains unclear or unrelated content (e.g., a receipt or handwritten note).
+{
+  "document_category": "unknown",
+  "document_type": "unknown"
+}
+Instructions:
+Extract the textual and visual information from the document image.
+Match it to the most likely document_category and document_type from the list above.
+Return your answer in strict JSON format, as shown below:
+{
+  "document_category": "string",
+  "document_type": "string"
+}
+If you are unable to confidently classify the document, return:
+{
+  "document_category": "unknown",
+  "document_type": "unknown"
+}
+Constraints:
+Your response must only include the JSON object.
+Do not include any explanation, notes, or additional content outside the JSON.
+"""

prompts/genric_ocr.py ADDED Viewed

	@@ -0,0 +1,9 @@

+genric_ocr_prompt = """
+You are an expert document analyzer spealizing in converting images of the document into structured data.
+You will be given images of the documents. Extract the data from them in a structured json way.
+The output should only be in JSON format. Dont output anything other than a valid json loadable in python.
+"""

prompts/identity_documents/__pycache__/driving_license.cpython-313.pyc ADDED Viewed

Binary file (1.21 kB). View file

prompts/identity_documents/__pycache__/passport.cpython-313.pyc ADDED Viewed

Binary file (1.6 kB). View file

prompts/identity_documents/driving_license.py ADDED Viewed

	@@ -0,0 +1,39 @@

+driving_license_prompt = """
+You are an intelligent document parser. Extract all relevant information from the provided image of a UK driving licence and return it in structured JSON format.
+The fields you must extract are:
+- surname
+- first_name (Might span across two lines)
+- date_of_birth (in YYYY-MM-DD format)
+- place_of_birth
+- date_of_issue (in YYYY-MM-DD format)
+- date_of_expiry (in YYYY-MM-DD format)
+- issuing_authority
+- driver_number
+- signature
+- address (with line_1, city, and postcode)
+- entitlements (as a list of licence categories)
+Return the output strictly in the following JSON format:
+{
+  "surname": "",
+  "first_name": "",
+  "date_of_birth": "",
+  "place_of_birth": "",
+  "date_of_issue": "",
+  "date_of_expiry": "",
+  "issuing_authority": "",
+  "driver_number": "",
+  "signature": "",
+  "address": {
+    "line_1": "",
+    "city": "",
+    "postcode": ""
+  },
+  "entitlements": []
+}
+Do not include any additional explanation or text—only return the filled JSON object.
+"""

prompts/identity_documents/passport.py ADDED Viewed

	@@ -0,0 +1,58 @@

+passport_prompt = """
+🛂 UK Passport Information Extraction Prompt
+You are an intelligent document parser.
+You will be given an image of a United Kingdom (UK) passport. Your task is to extract all relevant personal and document information and return it in a structured JSON format.
+📝 Extract the following fields (if available):
+- full_name (concatenated from surname and given_names)
+- surname
+- given_names
+- passport_number
+- nationality
+- date_of_birth (in YYYY-MM-DD format)
+- place_of_birth
+- sex (M or F)
+- date_of_issue (in YYYY-MM-DD format)
+- date_of_expiry (in YYYY-MM-DD format)
+- issuing_authority
+- passport_type (usually P)
+- country_code
+- mrz_line_1
+- mrz_line_2
+📦 Output Format
+Return your result using the following JSON structure:
+{
+  "full_name": "",
+  "surname": "",
+  "given_names": "",
+  "passport_number": "",
+  "nationality": "",
+  "date_of_birth": "",
+  "place_of_birth": "",
+  "sex": "",
+  "date_of_issue": "",
+  "date_of_expiry": "",
+  "issuing_authority": "",
+  "passport_type": "",
+  "country_code": "",
+  "mrz_line_1": "",
+  "mrz_line_2": ""
+}
+📌 Instructions
+If a field is not present or not readable, return it as an empty string "".
+Dates must be in YYYY-MM-DD format.
+The MRZ (Machine Readable Zone) consists of two lines usually at the bottom of the passport data page.
+Respond only with the JSON object — no extra text or explanation.
+"""

prompts/income_document/__pycache__/p60.cpython-313.pyc ADDED Viewed

Binary file (3.09 kB). View file

prompts/income_document/__pycache__/payslip.cpython-313.pyc ADDED Viewed

Binary file (2.4 kB). View file

prompts/income_document/p60.py ADDED Viewed

	@@ -0,0 +1,61 @@

+p60_prompt = """
+You are an expert document parser. Extract the structured information from a UK P60 End of Year Certificate and return it as a JSON object.
+Use the following format and structure. Ensure numerical values are parsed correctly, and fields with missing data should use null where appropriate. The "national_insurance_contributions" field should be an array to accommodate multiple NIC letters.
+{
+  "employee_details": {
+    "surname": "",                       // Employee's last name
+    "forenames_or_initials": "",        // Employee's first name or initials
+    "national_insurance_number": "",    // NI number (e.g. AB123456C)
+    "works_payroll_number": ""          // Internal payroll identifier
+  },
+  "pay_and_income_tax_details": {
+    "previous_employments": {
+      "pay": 0.00,                       // Pay from previous jobs in the tax year
+      "tax_deducted": 0.00              // Tax deducted from previous jobs
+    },
+    "current_employment": {
+      "pay": 0.00,                       // Pay from this employment
+      "tax_deducted": 0.00              // Tax deducted from this employment
+    },
+    "total_for_year": {
+      "pay": 0.00,                       // Total pay for the year
+      "tax_deducted": 0.00              // Total tax deducted for the year
+    },
+    "final_tax_code": ""                // Final PAYE tax code (e.g. 1257L)
+  },
+  "national_insurance_contributions": [
+    {
+      "nic_letter": "",                 // NIC table letter (e.g. A, B, C, J)
+      "earnings": {
+        "at_or_above_lel": 0.00,        // Earnings above Lower Earnings Limit
+        "above_lel_up_to_pt": 0.00,     // Earnings above LEL up to PT
+        "above_pt_up_to_uel": 0.00      // Earnings above PT up to UEL
+      },
+      "employee_contributions_above_pt": 0.00 // Contributions on earnings above PT
+    }
+    // ... Add more entries if needed
+  ],
+  "statutory_payments": {
+    "maternity_pay": 0.00,              // Statutory Maternity Pay included
+    "paternity_pay": 0.00,              // Statutory Paternity Pay included
+    "adoption_pay": 0.00,               // Statutory Adoption Pay included
+    "shared_parental_pay": 0.00         // Statutory Shared Parental Pay included
+  },
+  "other_details": {
+    "student_loan_deductions": 0.00     // Student loan deductions (whole £ only)
+  },
+  "employer_details": {
+    "employer_name_and_address": "",    // Employer's name and full address
+    "paye_reference": ""                // Employer's PAYE reference (e.g. 123/AB456)
+  }
+}
+✅ Additional Instructions (Optional):
+- Extract values as they appear on the document (e.g., keep leading zeroes, currency format).
+- If an amount field is empty or not present, use null.
+- Preserve all textual details like addresses or names exactly as shown.
+- While extracting user full name, then make sure to extract first name followed by last name
+"""

prompts/income_document/payslip.py ADDED Viewed

	@@ -0,0 +1,84 @@

+payslip_prompt = """
+💼 Payslip Information Extraction Prompt
+You are a document information extraction assistant.
+You will be given an image of a UK payslip. Your task is to extract all relevant details required for salary verification and compliance checks.
+📝 Extract the following fields:
+1. Identity & Date
+- employee_name
+- employer_name
+- employee_id
+- employee_address
+- employer_address
+- tax_code
+- payslip_date (format: YYYY-MM-DD)
+- pay_period_start (format: YYYY-MM-DD)
+- pay_period_end (format: YYYY-MM-DD)
+- payment_frequency (monthly or weekly)
+2. Salary Details
+- basic_pay
+- net_pay
+- gross_pay
+- salary_components: list of { name, amount } (e.g., bonus, overtime, allowance)
+3. Deductions
+- ni_contribution (National Insurance)
+- tax_deduction
+- other_deductions: list of { name, amount }
+📦 Output Format
+{
+  "employee_name": "",
+  "employer_name": "",
+  "employee_id : "",
+  "employee_address : "",
+  "employer_address" : "",
+  "tax_code": "",
+  "payslip_date": "",
+  "pay_period_start": "",
+  "pay_period_end": "",
+  "payment_frequency": "",
+  "basic_pay": "",
+  "net_pay": "",
+  "gross_pay": "",
+  "salary_components": [
+    {
+      "name": "",
+      "amount": ""
+    }
+  ],
+  "ni_contribution": "",
+  "tax_deduction": "",
+  "other_deductions": [
+    {
+      "name": "",
+      "amount": ""
+    }
+  ]
+}
+📌 Instructions
+- All monetary values should be extracted as numeric strings (e.g., "1550.00").
+- Dates must be returned in YYYY-MM-DD format.
+- If a field is missing or unreadable, use null.
+- Only return the structured JSON — no explanation or extra content.
+- Always extract only relevaant information
+- While extracting user full name, then make sure to extract first name followed by last name
+- other_deductions should only be list of dictionaries
+✅ This output supports the following checks:
+- Payslip includes Basic Pay, Net Pay, and detailed Salary Components
+- National Insurance (NI) is present
+- Tax deduction is clearly shown
+- Determine if payslip is most recent (monthly) or 4 consecutive weeks (weekly)
+- If only a date range is shown (e.g. pay_period_end), ensure it's within 35 days from today
+- If the payslip has no date, it's invalid
+"""

schemas/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .account_statement import UKBankAccountStatement
+from .custom_app_form import CustomAppFormUpload
+from .id import UKPassportSchema, UKDrivingLicense
+from .payslip import UKPayslipSchema
+from .uk_address import UKAddress

schemas/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (464 Bytes). View file

schemas/__pycache__/account_statement.cpython-313.pyc ADDED Viewed

Binary file (16.9 kB). View file

schemas/__pycache__/custom_app_form.cpython-313.pyc ADDED Viewed

Binary file (6.07 kB). View file

schemas/__pycache__/id.cpython-313.pyc ADDED Viewed

Binary file (10.8 kB). View file

schemas/__pycache__/payslip.cpython-313.pyc ADDED Viewed

Binary file (20.3 kB). View file

schemas/__pycache__/uk_address.cpython-313.pyc ADDED Viewed

Binary file (2.91 kB). View file

schemas/account_statement.py ADDED Viewed

	@@ -0,0 +1,609 @@

+import datetime
+import re
+from pydantic import (
+    BaseModel,
+    Field,
+    ValidationInfo,
+    computed_field,
+    model_validator,
+    ConfigDict
+)
+import pandas as pd
+class UKBankAccountStatement(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    statement_start_date: datetime.date | None = Field(
+        default=None,
+        description="Digital Bank account statement period's start date in YYYY-MM-DD format",
+        examples=["2025-01-01"],
+    )
+    statement_end_date: datetime.date | None = Field(
+        default=None,
+        description="Digital Bank account statement period's end date in YYYY-MM-DD format",
+        examples=["2025-01-31"],
+    )
+    first_salary_deposit_date_present: int | datetime.date | None = Field(
+        default=None,
+        description=(
+            "The day/date of the very first salary deposit line item present in"
+            " the bank account statement. Value must be gte 1 & lte 31"
+        ),
+        examples=[
+            "If first present salary deposit date is 2025-01-06, then 6 must be passed"
+        ],
+    )
+    bank_name: str | None = Field(
+        default=None,
+        description="Extracted bank name value, stripped of whitespaces at beginning & end",
+        examples=["HSBC"],
+    )  # , min_length=4, max_length=50)
+    full_name: str | None = Field(
+        default=None,
+        description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61",
+        examples=["Jodie Pippa"],
+    )  # , min_length=2, max_length=61)
+    account_number: str | None = Field(
+        default=None,
+        description="UK Bank Account Statement's account number. Must be of 8 characters length only",
+        examples=["12345678"],
+    )  # , min_length=8, max_length=8)  # 12345678
+    sort_code: str | None = Field(
+        default=None,
+        description="UK Bank Account Sort Code. Must be of length 8 characters only. Format: xx-xx-xx",
+        examples="20-00-00",
+    )  # , min_length=8, max_length=8)  # 20-00-00
+    # is_salary_credit_consistent_across_months: bool = Field(
+    #     default=False,
+    #     description=(
+    #         "If the bank account statement spans several months, sense check "
+    #         "whether salary deposit amounts across months are consistent"
+    #     ),
+    #     examples=[True, False, None],
+    # )
+    account_statement_date_err_msgs: str | None = None
+    full_name_err_msgs: str | None = None
+    bank_name_err_msgs: str | None = None
+    account_number_err_msgs: str | None = None
+    sort_code_err_msgs: str | None = None
+    salary_deposit_err_msgs: str | None = None
+    validation_policy_status_df: pd.DataFrame = pd.DataFrame(
+        columns=["Policy", "Value", "Status", "Message"])
+    @model_validator(mode="after")
+    def validate_full_name(cls, values, info: ValidationInfo):
+        """Match applicant's full name against provided name (case-insensitive)"""
+        try:
+            err_msgs = []
+            expected = (
+                info.context.get("application_summary_full_name")
+                if info.context
+                else None
+            )
+            full_name_val = values.full_name
+            if not full_name_val:
+                err_msgs.append("Applicant's full name not present")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full Name",
+                    full_name_val,
+                    False,
+                    "Applicant's full name not present",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full Name",
+                    full_name_val,
+                    True,
+                    "Applicant's full name is present",
+                ]
+            full_name_val_len = 0
+            if full_name_val:
+                full_name_val_len = len(full_name_val)
+            if not full_name_val and not (
+                full_name_val_len >= 2 and full_name_val_len <= 61
+            ):
+                err_msgs.append(
+                    "Full name must have a length of at least 2 & at most 61"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full Name",
+                    full_name_val_len,
+                    False,
+                    "Full name does not have a length of at least 2 & at most 61",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full Name",
+                    full_name_val_len,
+                    True,
+                    "Full name has a length of at least 2 & at most 61",
+                ]
+            if (
+                not expected
+                or not full_name_val
+                or full_name_val.lower() != expected.lower()
+            ):
+                err_msgs.append("Name mismatch with provided value")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Data Match",
+                    f"{full_name_val}, {expected}",
+                    False,
+                    "Name does not match with provided value",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Data Match",
+                    f"{full_name_val}, {expected}",
+                    True,
+                    "Name matches with provided value",
+                ]
+            if not full_name_val or len(full_name_val.strip().split(" ")) < 2:
+                err_msgs.append(
+                    "Full name must consist of at least 2 words (first name + last name)"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full Name",
+                    full_name_val,
+                    False,
+                    "Full name does not consist of at least 2 words (first name + last name)",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full Name",
+                    full_name_val,
+                    True,
+                    "Full name consists of at least 2 words (first name + last name)",
+                ]
+            if err_msgs:
+                values.full_name_err_msgs = ", ".join(err_msgs)
+            else:
+                values.full_name_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_bank_name(cls, values, info: ValidationInfo):
+        """Match bank name against provided name (case-insensitive)"""
+        try:
+            err_msgs = []
+            expected = (
+                info.context.get("application_summary_bank_name")
+                if info.context
+                else None
+            )
+            bank_name_val = values.bank_name
+            if not bank_name_val:
+                err_msgs.append("Bank name not present")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank name",
+                    bank_name_val,
+                    False,
+                    "Bank name is not present",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank name",
+                    bank_name_val,
+                    True,
+                    "Bank name is present",
+                ]
+            bank_name_val_len = 0
+            if bank_name_val:
+                bank_name_val_len = len(bank_name_val)
+            if not bank_name_val and not (
+                bank_name_val_len >= 4 and bank_name_val_len <= 50
+            ):
+                err_msgs.append(
+                    "Bank name must have a length of at least 4 & at most 50"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank name",
+                    bank_name_val_len,
+                    False,
+                    "Bank name does not have a length of at least 4 & at most 50",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank name",
+                    bank_name_val_len,
+                    True,
+                    "Bank name has a length of at least 4 & at most 50",
+                ]
+            if (
+                not expected
+                or not bank_name_val
+                or bank_name_val.lower() != expected.lower()
+            ):
+                err_msgs.append("Bank name mismatch with provided value")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Data Match",
+                    f"{bank_name_val}, {expected}",
+                    False,
+                    "Bank name does not match with provided value",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Data Match",
+                    f"{bank_name_val}, {expected}",
+                    True,
+                    "Bank name matches with provided value",
+                ]
+            if err_msgs:
+                values.bank_name_err_msgs = ", ".join(err_msgs)
+            else:
+                values.bank_name_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_account_number(cls, values):
+        """Validate detected bank account number"""
+        try:
+            err_msgs = list()
+            if not values.account_number:
+                err_msgs.append(
+                    "Bank account number not present. Bank account number must be present."
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank account number",
+                    values.account_number,
+                    False,
+                    "Bank account number is not present.",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank account number",
+                    values.account_number,
+                    True,
+                    "Bank name matches is present",
+                ]
+            if not values.account_number or not re.fullmatch(
+                r"^\d{8}$", values.account_number
+            ):
+                err_msgs.append(
+                    "Provided account number is invalid. It must be of 8 digits length only"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank account number",
+                    values.account_number,
+                    False,
+                    "Provided account number is invalid",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Bank account number",
+                    values.account_number,
+                    True,
+                    "Provided account number is valid",
+                ]
+            if err_msgs:
+                values.account_number_err_msgs = ", ".join(err_msgs)
+            else:
+                values.account_number_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_sort_code(cls, values):
+        """Validate extracted Bank Account Sort Code"""
+        try:
+            err_msgs = list()
+            if not values.sort_code:
+                err_msgs.append(
+                    "Sort code not present. Sort number must be present.")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Sort code",
+                    values.sort_code,
+                    False,
+                    "Sort code is not present.",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Sort code",
+                    values.sort_code,
+                    True,
+                    "Sort code is present.",
+                ]
+            # if not values.sort_code or not re.fullmatch(r"^\d{2}-?\d{2}-?\d{2}$", values.sort_code):
+            if not values.sort_code or not re.fullmatch(
+                r"^\d{2}-\d{2}-\d{2}$", values.sort_code
+            ):
+                err_msgs.append(
+                    "Provided sort code's format is invalid. It must be of the format xx-xx-xx wherein x are digits."
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Sort code",
+                    values.sort_code,
+                    False,
+                    "Sort code's format is invalid.",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Sort code",
+                    values.sort_code,
+                    True,
+                    "Sort code's format is valid.",
+                ]
+            if err_msgs:
+                values.sort_code_err_msgs = ", ".join(err_msgs)
+            else:
+                values.sort_code_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_bank_account_statement_dates(cls, values):
+        try:
+            err_msgs = list()
+            statement_start_date_val = values.statement_start_date
+            statement_end_date_val = values.statement_end_date
+            if not statement_start_date_val or not statement_end_date_val:
+                err_msgs.append(
+                    "Both statement start date & statement end date must be present"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Date checks",
+                    f"{statement_start_date_val}, {statement_end_date_val}",
+                    False,
+                    "Both statement start date & statement end date are not present",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Date checks",
+                    f"{statement_start_date_val}, {statement_end_date_val}",
+                    True,
+                    "Both statement start date & statement end date are present",
+                ]
+            if statement_start_date_val and statement_end_date_val:
+                if (statement_end_date_val - statement_start_date_val).days < 28:
+                    err_msgs.append(
+                        "Account statement period's start date & end date must have a gap of at least 28 days"
+                    )
+                    values.validation_policy_status_df.loc[
+                        len(values.validation_policy_status_df)
+                    ] = [
+                        "Coverage",
+                        f"{statement_start_date_val}, {statement_end_date_val}",
+                        False,
+                        "Account statement period's start date & end date donot have a gap of at least 28 days",
+                    ]
+                else:
+                    values.validation_policy_status_df.loc[
+                        len(values.validation_policy_status_df)
+                    ] = [
+                        "Coverage",
+                        f"{statement_start_date_val}, {statement_end_date_val}",
+                        True,
+                        "Account statement period's start date & end date have a gap of at least 28 days",
+                    ]
+            if err_msgs:
+                values.account_statement_date_err_msgs = ", ".join(err_msgs)
+            else:
+                values.account_statement_date_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_salary_credit_checks(cls, values):
+        try:
+            err_msgs = list()
+            statement_start_date_val = values.statement_start_date
+            statement_end_date_val = values.statement_end_date
+            first_salary_deposit_date_present_val = (
+                values.first_salary_deposit_date_present
+            )
+            # # is_salary_credit_present_val = values.is_salary_credit_present
+            # is_salary_credit_consistent_across_months_val = (
+            #     values.is_salary_credit_consistent_across_months
+            # )
+            # if not statement_start_date_val or not statement_end_date_val:
+            #     err_msgs.append(
+            #         "Both statement start date & statement end date must be present"
+            #     )
+            #     values.validation_policy_status_df.loc[len(
+            #         values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", False,  "Both statement start date & statement end date are not present"]
+            # else:
+            #     values.validation_policy_status_df.loc[len(
+            #         values.validation_policy_status_df)] = ["Both statement start date & statement end date must be present", f"{statement_start_date_val}, {statement_end_date_val}", True,  "Both statement start date & statement end date are present"]
+            if not first_salary_deposit_date_present_val:
+                err_msgs.append("At least one salary credit must be present")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Salary deposit",
+                    first_salary_deposit_date_present_val,
+                    False,
+                    "At least one salary credit is not present",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Salary deposit",
+                    first_salary_deposit_date_present_val,
+                    True,
+                    "At least one salary credit is present",
+                ]
+            if (
+                not statement_start_date_val
+                or not statement_end_date_val
+                or (statement_end_date_val < statement_start_date_val)
+            ):
+                err_msgs.append(
+                    "Statement period's end date must be after the start date"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Date checks",
+                    f"{statement_start_date_val}, {statement_end_date_val}",
+                    False,
+                    "Statement period's end date is not after the start date",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Date checks",
+                    f"{statement_start_date_val}, {statement_end_date_val}",
+                    True,
+                    "Statement period's end date is after the start date",
+                ]
+            # # if start and end and (start.month != end.month or start.year != end.year):
+            # if (
+            #     statement_start_date_val
+            #     and statement_end_date_val
+            #     and first_salary_deposit_date_present_val
+            #     and (
+            #         statement_start_date_val.month < statement_end_date_val.month
+            #         or statement_start_date_val.year < statement_end_date_val.year
+            #     )
+            #     and (
+            #         statement_end_date_val.day >= first_salary_deposit_date_present_val
+            #     )
+            # ):
+            #     if not is_salary_credit_consistent_across_months_val:
+            #         err_msgs.append(
+            #             "Salary credit amount across months must be consistent"
+            #         )
+            if err_msgs:
+                values.salary_deposit_err_msgs = ", ".join(err_msgs)
+            else:
+                values.salary_deposit_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @computed_field
+    @property
+    def is_red_flagged(self) -> bool:
+        if (
+            self.account_statement_date_err_msgs
+            or self.full_name_err_msgs
+            or self.bank_name_err_msgs
+            or self.account_number_err_msgs
+            or self.sort_code_err_msgs
+            or self.salary_deposit_err_msgs
+        ):
+            return True
+        return False

schemas/custom_app_form.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import datetime
+import re
+from dateutil.relativedelta import relativedelta
+from pydantic import (
+    BaseModel,
+    computed_field,
+    Field,
+    ValidationInfo,
+    model_validator,
+    ConfigDict
+)
+import pandas as pd
+class CustomAppFormUpload(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    # application_summary_full_name: str | None = Field(None, alias="full_name")
+    # application_summary_bank_name: str | None = Field(None, alias="bank_name")
+    # application_summary_employer_name: str | None = Field(None, alias="employer_name")
+    # application_summary_complete_address: str | None = Field(None, alias="complete_address")
+    application_summary_full_name: str = Field(alias="full_name")
+    application_summary_bank_name: str = Field(alias="bank_name")
+    application_summary_employer_name: str = Field(alias="employer_name")
+    application_summary_complete_address: str = Field(alias="complete_address")
+    full_name_err_msgs: str | None = None
+    bank_name_err_msgs: str | None = None
+    employer_name_err_msgs: str | None = None
+    complete_employee_address_err_msgs: str | None = None
+    validation_policy_status_df: pd.DataFrame = pd.DataFrame(
+        columns=["Policy", "Value", "Status", "Message"])
+    # is_incomplete: bool = False
+    @model_validator(mode="after")
+    def validate_full_name(self, info: ValidationInfo):
+        """Validate provided applicant's full name"""
+        try:
+            err_msgs = []
+            full_name_val = self.application_summary_full_name
+            if not full_name_val:
+                err_msgs.append("Applicant's full name not present")
+            full_name_val_len = 0
+            if full_name_val:
+                full_name_val_len = len(full_name_val)
+            if not full_name_val and not (
+                full_name_val_len >= 2 and full_name_val_len <= 61
+            ):
+                err_msgs.append(
+                    "Full name must have a length of at least 2 & at most 61"
+                )
+            if not full_name_val or len(full_name_val.strip().split(" ")) < 2:
+                err_msgs.append(
+                    "Full name must consist of at least 2 words (first name + last name)"
+                )
+            if err_msgs:
+                self.full_name_err_msgs = ", ".join(err_msgs)
+            else:
+                self.full_name_err_msgs = None
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_bank_name(self, info: ValidationInfo):
+        """Validate provided bank name"""
+        try:
+            err_msgs = []
+            bank_name_val = self.application_summary_bank_name
+            if not bank_name_val:
+                err_msgs.append("Bank name not present")
+            bank_name_val_len = 0
+            if bank_name_val:
+                bank_name_val_len = len(bank_name_val)
+            if not bank_name_val and not (
+                bank_name_val_len >= 4 and bank_name_val_len <= 50
+            ):
+                err_msgs.append(
+                    "Bank name must have a length of at least 4 & at most 50"
+                )
+            if err_msgs:
+                self.bank_name_err_msgs = ", ".join(err_msgs)
+            else:
+                self.bank_name_err_msgs = None
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_employer_name(self, info: ValidationInfo):
+        """Validate provided employer name"""
+        try:
+            err_msgs = []
+            employer_name_val = self.application_summary_employer_name
+            if not employer_name_val:
+                err_msgs.append("Employer name not present")
+            # # Allowed: letters, numbers, spaces, and common name punctuation
+            # pattern = r"^[A-Za-z0-9&\-,.()'/@ ]{2,100}$"
+            # if not re.match(pattern, employer_name_val):
+            #     err_msgs.append("Employer name contains invalid characters")
+            if not re.search(r"[A-Za-z]", employer_name_val):
+                err_msgs.append(
+                    "Employer name must contain at least one letter")
+            if employer_name_val.strip() == "":
+                err_msgs.append("Employer name cannot be only whitespace")
+            self.employer_name_err_msgs = ", ".join(
+                err_msgs) if err_msgs else None
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_complete_address(self, info: ValidationInfo):
+        try:
+            err_msgs = []
+            val = self.application_summary_complete_address
+            if not val:
+                err_msgs.append("Applicant's address not present")
+            length = len(val) if val else 0
+            if not (10 <= length <= 300):
+                err_msgs.append(
+                    "Applicant's complete address must have a length of at least 10 & at most 300"
+                )
+            self.complete_employee_address_err_msgs = (
+                ", ".join(err_msgs) if err_msgs else None
+            )
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @computed_field
+    @property
+    def is_incomplete(self) -> bool:
+        if any([
+            self.full_name_err_msgs,
+            self.bank_name_err_msgs,
+            self.employer_name_err_msgs,
+            self.complete_employee_address_err_msgs,
+        ]):
+            return True
+        return False

schemas/id.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import datetime
+from dateutil.relativedelta import relativedelta
+from pydantic import (
+    BaseModel,
+    computed_field,
+    Field,
+    ValidationInfo,
+    model_validator,
+    ConfigDict
+)
+import pandas as pd
+class UKPassportSchema(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    full_name: str | None = Field(
+        default=None,
+        description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61",
+        examples=["Jodie Pippa"],
+    )  # , min_length=2, max_length=61)
+    expiry_date: datetime.date | None = Field(
+        default=None,
+        description="The passport's expiry date in YYYY-MM-DD format",
+        examples=["2028-06-01"],
+    )
+    full_name_err_msgs: str | None = None
+    expiry_date_err_msgs: str | None = None
+    validation_policy_status_df: pd.DataFrame = pd.DataFrame(
+        columns=["Policy", "Value", "Status", "Message"])
+    @model_validator(mode="after")
+    def validate_expiry_date(cls, values):
+        try:
+            err_msgs = list()
+            expiry_date_val = values.expiry_date
+            if not expiry_date_val:
+                err_msgs.append("Expiry date must be present")
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Expiry date must be present", values.expiry_date, False, "Expiry date is not present"]
+            else:
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Expiry date must be present", values.expiry_date, True, "Expiry date is present"]
+            if expiry_date_val < datetime.date.today() + relativedelta(years=1):
+                # raise ValueError("Provided passport expires within 1 year")
+                err_msgs.append("Provided passport expires within 1 year")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Provided passport expiry should be more than 1 year",
+                    values.expiry_date,
+                    False,
+                    "Provided passport expires within 1 year &/or is expired",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Provided passport expiry should be more than 1 year",
+                    values.expiry_date,
+                    True,
+                    "Provided passport does not expire within 1 year",
+                ]
+            values.expiry_date_err_msgs = ", ".join(
+                err_msgs) if err_msgs else None
+            return values
+        except Exception as e:
+            raise
+            # if not values.expiry_date_err_msgs:
+            #     values.expiry_date_err_msgs = "Provided passport expires within 1 year"
+            # else:
+            #     values.expiry_date_err_msgs = f"{values.expiry_date_err_msgs}, Provided passport expires within 1 year"
+        # if not values.expiry_date_err_msgs:
+        #     values.expiry_date_err_msgs = None
+        # return values
+    @model_validator(mode="after")
+    def validate_full_name(cls, values, info: ValidationInfo):
+        """Match applicant's full name against provided name (case-insensitive)"""
+        try:
+            err_msgs = []
+            expected = (
+                info.context.get("application_summary_full_name")
+                if info.context
+                else None
+            )
+            full_name_val = values.full_name
+            if not full_name_val:
+                err_msgs.append("Applicant's full name not present")
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Applicant's full name should be present", full_name_val, False, "Applicant's full name not present"]
+            else:
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Applicant's full name should be present", full_name_val, True, "Applicant's full name is present"]
+            full_name_val_len = 0
+            if full_name_val:
+                full_name_val_len = len(full_name_val)
+            if not full_name_val and not (
+                full_name_val_len >= 2 and full_name_val_len <= 61
+            ):
+                err_msgs.append(
+                    "Full name must have a length of at least 2 & at most 61"
+                )
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = [ "Full name must have a length of at least 2 & at most 61", full_name_val_len, False,  "Full name does not have a length of at least 2 & at most 61"]
+            else:
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = [ "Full name must have a length of at least 2 & at most 61", full_name_val_len, True,  "Full name has a length of at least 2 & at most 61"]
+            if (
+                not expected
+                or not full_name_val
+                or full_name_val.lower() != expected.lower()
+            ):
+                err_msgs.append("Name mismatch with provided value")
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Name should match with provided value", full_name_val, False,  "Name does not match with provided value"]
+            else:
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Name should match with provided value", full_name_val, True,  "Name matches with provided value"]
+            if not full_name_val or len(full_name_val.strip().split(" ")) < 2:
+                err_msgs.append(
+                    "Full name must consist of at least 2 words (first name + last name)"
+                )
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Full name must consist of at least 2 words (first name + last name)", len(full_name_val.strip().split(" ")), False,  "Full name does not consist of at least 2 words (first name + last name)"]
+            else:
+                values.validation_policy_status_df.loc[len(
+                    values.validation_policy_status_df)] = ["Full name must consist of at least 2 words (first name + last name)", len(full_name_val.strip().split(" ")), True,  "Full name does consist of at least 2 words (first name + last name)"]
+            if err_msgs:
+                values.full_name_err_msgs = ", ".join(err_msgs)
+            else:
+                values.full_name_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @computed_field
+    @property
+    def is_red_flagged(self) -> bool:
+        if self.full_name_err_msgs or self.expiry_date_err_msgs:
+            return True
+        return False
+class UKDrivingLicense(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    full_name: str | None = Field(
+        default=None,
+        description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61",
+        examples=["Jodie Pippa"],
+    )  # , min_length=2, max_length=61)
+    full_name_err_msgs: str | None = None
+    expiry_date_err_msgs: str | None = None
+    validation_policy_status_df: pd.DataFrame = pd.DataFrame(
+        columns=["Policy", "Value", "Status", "Message"])
+    @model_validator(mode="after")
+    def validate_full_name(cls, values, info: ValidationInfo):
+        """Match applicant's full name against provided name (case-insensitive)"""
+        try:
+            err_msgs = []
+            expected = (
+                info.context.get("application_summary_full_name")
+                if info.context
+                else None
+            )
+            full_name_val = values.full_name
+            if not full_name_val:
+                err_msgs.append("Applicant's full name not present")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Applicant's full name should be present",
+                    full_name_val,
+                    False,
+                    "Applicant's full name not present",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Applicant's full name should be present",
+                    full_name_val,
+                    True,
+                    "Applicant's full name is present",
+                ]
+            full_name_val_len = 0
+            if full_name_val:
+                full_name_val_len = len(full_name_val)
+            if not full_name_val and not (
+                full_name_val_len >= 2 and full_name_val_len <= 61
+            ):
+                err_msgs.append(
+                    "Full name must have a length of at least 2 & at most 61"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full name must have a length of at least 2 & at most 61",
+                    full_name_val_len,
+                    False,
+                    "Full name does not have a length of at least 2 & at most 61",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full name must have a length of at least 2 & at most 61",
+                    full_name_val_len,
+                    True,
+                    "Full name has a length of at least 2 & at most 61",
+                ]
+            if (
+                not expected
+                or not full_name_val
+                or full_name_val.lower() != expected.lower()
+            ):
+                err_msgs.append("Name mismatch with provided value")
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Name should match with provided value",
+                    full_name_val,
+                    False,
+                    "Name does not match with provided value",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Name should match with provided value",
+                    full_name_val,
+                    True,
+                    "Name matches with provided value",
+                ]
+            if not full_name_val or len(full_name_val.strip().split(" ")) < 2:
+                err_msgs.append(
+                    "Full name must consist of at least 2 words (first name + last name)"
+                )
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full name must consist of at least 2 words (first name + last name)",
+                    len(full_name_val.strip().split(" ")),
+                    False,
+                    "Full name does not consist of at least 2 words (first name + last name)",
+                ]
+            else:
+                values.validation_policy_status_df.loc[
+                    len(values.validation_policy_status_df)
+                ] = [
+                    "Full name must consist of at least 2 words (first name + last name)",
+                    len(full_name_val.strip().split(" ")),
+                    True,
+                    "Full name does consist of at least 2 words (first name + last name)",
+                ]
+            if err_msgs:
+                values.full_name_err_msgs = ", ".join(err_msgs)
+            else:
+                values.full_name_err_msgs = None
+            return values
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @computed_field
+    @property
+    def is_red_flagged(self) -> bool:
+        if self.full_name_err_msgs or self.expiry_date_err_msgs:
+            return True
+        return False

schemas/payslip.py ADDED Viewed

	@@ -0,0 +1,551 @@

+import datetime
+import re
+from pydantic import (
+    BaseModel,
+    computed_field,
+    Field,
+    ValidationInfo,
+    model_validator,
+    ConfigDict
+)
+import pandas as pd
+class UKPayslipSchema(BaseModel):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    pay_period_start_date: datetime.date | None = Field(
+        default=None,
+        description="Pay period's start date in YYYY-MM-DD format",
+        examples=["2025-02-01"],
+    )
+    pay_period_end_date: datetime.date | None = Field(
+        default=None,
+        description="Pay period's end date in YYYY-MM-DD format",
+        examples=["2025-02-28"],
+    )
+    pay_period_days: int | None = Field(
+        default=None,
+        description="pay_period_end_date - pay_period_start_date in days",
+        examples=[28],
+    )
+    pay_date: datetime.date | None = Field(None)
+    full_name: str | None = Field(
+        default=None,
+        description="Applicant's full name. Must consist of at least 2 words, have length gte 2 & lte 61",
+        examples=["Jodie Pippa"],
+    )
+    employer_name: str | None = Field(
+        default=None,
+        description="Employer name extracted",
+        examples=["ABC Ltd"],
+    )
+    is_basic_pay_net_pay_other_salary_components_present: bool = Field(
+        default=False,
+        description="Boolean indicating whether Basic Pay, Net Pay, other requisite salary components/line items are present in the payslip",
+        examples=[True, False],
+    )
+    is_tax_deducation_present: bool = Field(
+        default=False,
+        description="Boolean flag indicating whether Tax Deduction line item is present in the payslip",
+        examples=[True, False],
+    )
+    is_ni_deduction_present: bool = Field(
+        default=False,
+        description="Boolean flag indicating whether NI/National Insurance deduction line item is present in the payslip",
+        examples=[True, False],
+    )
+    complete_employee_address: str | None = Field(
+        default=None,
+        description="Employee's complete address as a string",
+        examples=["123 Maple Street, London, UK, SW1A 1AA"],
+    )
+    # employee_number: int | None = Field(
+    #     default=None,
+    #     description="Employee number",
+    #     examples=[3558, 1234],
+    # )
+    pay_dates_err_msgs: str | None = None
+    full_name_err_msgs: str | None = None
+    employer_name_err_msgs: str | None = None
+    payslip_line_item_presence_err_msgs: str | None = None
+    complete_employee_address_err_msgs: str | None = None
+    validation_policy_status_df: pd.DataFrame = pd.DataFrame(
+        columns=["Policy", "Value", "Status", "Message"])
+    # employee_number_err_msgs: str | None = None
+    # is_red_flagged: bool = False
+    @model_validator(mode="after")
+    def validate_full_name(self, info: ValidationInfo):
+        """Match applicant's full name against provided name (case-insensitive)"""
+        try:
+            err_msgs = []
+            expected = (
+                info.context.get("application_summary_full_name")
+                if info.context
+                else None
+            )
+            if not self.full_name:
+                err_msgs.append("Applicant's full name not present")
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", self.full_name, False, "Applicant's full name is not present"]
+            else:
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", self.full_name, True, "Applicant's full name is present"]
+            full_name_val_len = len(self.full_name) if self.full_name else 0
+            if not (2 <= full_name_val_len <= 61):
+                err_msgs.append(
+                    "Full name must have a length of at least 2 & at most 61"
+                )
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", full_name_val_len, False, "Full name has a length of at least 2 & at most 61"]
+            else:
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", full_name_val_len, True, "Full name has a length of at least 2 & at most 61"]
+            if not self.full_name or len(self.full_name.strip().split(" ")) < 2:
+                err_msgs.append(
+                    "Full name must consist of at least 2 words (first name + last name)"
+                )
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names",  self.full_name , False, "Full name does not consist of at least 2 words (first name + last name)"]
+            else:
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", len(self.full_name.strip().split(" ")), True, "Full name consists of at least 2 words (first name + last name)"]
+            if (
+                not expected
+                or not self.full_name
+                or self.full_name.lower() != expected.lower()
+            ):
+                err_msgs.append("Name mismatch with provided value")
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", f"{self.full_name}, {expected}", False, "Name does not match with provided value"]
+            else:
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", f"{self.full_name}, {expected}", True, "Name matches with provided value"]
+            self.full_name_err_msgs = ", ".join(err_msgs) if err_msgs else None
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_employer_name(self, info: ValidationInfo):
+        """Match employer against provided employer name (case-insensitive)"""
+        try:
+            err_msgs = []
+            expected = (
+                info.context.get("application_summary_employer_name")
+                if info.context
+                else None
+            )
+            employer_name_val = self.employer_name
+            if not employer_name_val:
+                err_msgs.append("Employer name not present")
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", employer_name_val, False, "Employer name is not present"]
+            else:
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", employer_name_val, True, "Employer name is present"]
+            is_employer_name_match = (
+                expected
+                and employer_name_val
+                and employer_name_val.lower() == expected.lower()
+            )
+            if not is_employer_name_match:
+                err_msgs.append("Employer name mismatch with provided value")
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", f"{employer_name_val}, {expected}", False, "Employer name does not match with provided value"]
+            else:
+                self.validation_policy_status_df.loc[len(
+                    self.validation_policy_status_df)] = ["Employer & Customer Names", f"{employer_name_val}, {expected}", True, "Employer name matches with provided value"]
+                # # Allowed: letters, numbers, spaces, and common name punctuation
+                # pattern = r"^[A-Za-z0-9&\-,.()'/@ ]{2,100}$"
+                # if not re.match(pattern, employer_name_val):
+                #     err_msgs.append("Employer name contains invalid characters")
+                if not re.search(r"[A-Za-z]", employer_name_val):
+                    err_msgs.append(
+                        "Employer name must contain at least one letter")
+                if employer_name_val.strip() == "":
+                    err_msgs.append("Employer name cannot be only whitespace")
+            self.employer_name_err_msgs = ", ".join(
+                err_msgs) if err_msgs else None
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_payslip_dates(self):
+        try:
+            err_msgs = []
+            today = datetime.date.today()
+            threshold_date = today - datetime.timedelta(days=35)
+            if not self.pay_period_start_date or not self.pay_period_end_date:
+                err_msgs.append(
+                    "Undated Payslips"
+                )
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Undated Payslips",
+                    f"{self.pay_period_start_date}, {self.pay_period_end_date}",
+                    False,
+                    "Undated payslip",
+                ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Undated Payslips",
+                    f"{self.pay_period_start_date}, {self.pay_period_end_date}",
+                    True,
+                    "Dated payslip",
+                ]
+                # self.is_red_flagged = True
+            if self.pay_date:
+                if not (threshold_date <= self.pay_date <= today):
+                    err_msgs.append(
+                        "Pay date must be within the last 35 days & not in the future"
+                    )
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Date Requirement",
+                        self.pay_date,
+                        False,
+                        "Pay date is not within the last 35 days & not in the future",
+                    ]
+                else:
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Date Requirement",
+                        self.pay_date,
+                        True,
+                        "Pay date is within the last 35 days & not in the future",
+                    ]
+            # elif self.pay_period_end_date:
+            else:
+                if not (threshold_date <= self.pay_period_end_date <= today):
+                    err_msgs.append(
+                        "Pay period's end date must be within the last 35 days & not in the future"
+                    )
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Period End Date (DD/MM/YYYY, if no pay date)",
+                        self.pay_date,
+                        False,
+                        "Pay date is not within the last 35 days &/or in the future",
+                    ]
+                else:
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Period End Date (DD/MM/YYYY, if no pay date)",
+                        self.pay_date,
+                        True,
+                        "Pay date is within the last 35 days & not in the future",
+                    ]
+                prev_month_end = datetime.date.today().replace(day=1) - \
+                    datetime.timedelta(days=1)
+                prev_month_start = prev_month_end.replace(day=1)
+                if not (
+                    prev_month_start <= self.pay_period_start_date
+                    and self.pay_period_start_date < self.pay_period_end_date <= today
+                ):
+                    err_msgs.append(
+                        "Payslip date(s) must not be older than those of the last calendar month"
+                    )
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Period Month (MM/YYYY, if no pay date) basis pay period duration",
+                        self.pay_date,
+                        False,
+                        "Payslip date(s) is older than those of the last calendar month",
+                    ]
+                else:
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Period Month (MM/YYYY, if no pay date) basis pay period duration",
+                        self.pay_date,
+                        True,
+                        "Payslip date(s) is not older than those of the last calendar month",
+                    ]
+            if self.pay_period_start_date and self.pay_period_end_date:
+                if self.pay_period_start_date >= self.pay_period_end_date:
+                    err_msgs.append(
+                        "Pay period's start date must be before the end date")
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Period Start & End Dates",
+                        f"{self.pay_period_start_date}, {self.pay_period_end_date}",
+                        False,
+                        "Pay period's start date is not before the end date",
+                    ]
+                else:
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Pay Period Start & End Dates",
+                        f"{self.pay_period_start_date}, {self.pay_period_end_date}",
+                        True,
+                        "Pay period's start date is before the end date",
+                    ]
+                if (self.pay_period_end_date - self.pay_period_start_date).days < 28:
+                    err_msgs.append(
+                        "Pay period's start date & end date must have a gap of at least 28 days"
+                    )
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Submission Requirement (Monthly Pay)",
+                        (self.pay_period_end_date - self.pay_period_start_date).days,
+                        False,
+                        "Pay period's start date & end date donot have a gap of at least 28 days",
+                    ]
+                else:
+                    self.validation_policy_status_df.loc[
+                        len(self.validation_policy_status_df)
+                    ] = [
+                        "Submission Requirement (Monthly Pay)",
+                        (self.pay_period_end_date - self.pay_period_start_date).days,
+                        True,
+                        "Pay period's start date & end date have a gap of at least 28 days",
+                    ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Pay Period Start & End Dates",
+                    f"{self.pay_period_start_date}, {self.pay_period_end_date}",
+                    False,
+                    "Pay period's start date is not before the end date",
+                ]
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Submission Requirement (Monthly Pay)",
+                    f"{self.pay_period_start_date}, {self.pay_period_end_date}",
+                    False,
+                    "Pay period's start date & end date donot have a gap of at least 28 days",
+                ]
+            self.pay_dates_err_msgs = ", ".join(err_msgs) if err_msgs else None
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_payslip_components_checks(self):
+        try:
+            err_msgs = []
+            if not self.is_basic_pay_net_pay_other_salary_components_present:
+                err_msgs.append(
+                    "Basic salary, Net Salary and/or other requisite salary components not present"
+                )
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Requisite salary line items",
+                    self.is_basic_pay_net_pay_other_salary_components_present,
+                    False,
+                    "Basic salary, Net Salary and/or other requisite salary components not present",
+                ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Requisite salary line items",
+                    self.is_basic_pay_net_pay_other_salary_components_present,
+                    True,
+                    "Basic salary, Net Salary and/or other requisite salary components are present",
+                ]
+            if not self.is_tax_deducation_present:
+                err_msgs.append("Tax Deduction line item must be present")
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Tax & NI Contributions",
+                    self.is_tax_deducation_present,
+                    False,
+                    "Tax Deduction line item is not present",
+                ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Tax & NI Contributions",
+                    self.is_tax_deducation_present,
+                    True,
+                    "Tax Deduction line item is present",
+                ]
+            if not self.is_ni_deduction_present:
+                err_msgs.append("NI/National Insurance line item must be present")
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Tax & NI Contributions",
+                    self.is_ni_deduction_present,
+                    False,
+                    "NI/National Insurance line item is not present",
+                ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Tax & NI Contributions",
+                    self.is_ni_deduction_present,
+                    True,
+                    "NI/National Insurance line item is present",
+                ]
+            self.payslip_line_item_presence_err_msgs = (
+                ", ".join(err_msgs) if err_msgs else None
+            )
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    @model_validator(mode="after")
+    def validate_complete_address(self, info: ValidationInfo):
+        try:
+            err_msgs = []
+            expected = (
+                info.context.get("application_summary_complete_address")
+                if info.context
+                else None
+            )
+            val = self.complete_employee_address
+            if not val:
+                err_msgs.append("Applicant's address not present")
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Applicant Address",
+                    val,
+                    False,
+                    "Applicant's address not present",
+                ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Applicant Address",
+                    val,
+                    True,
+                    "Applicant's address is present",
+                ]
+            length = len(val) if val else 0
+            if not (10 <= length <= 300):
+                err_msgs.append(
+                    "Applicant's complete address must have a length of at least 10 & at most 300"
+                )
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Applicant Address",
+                    length,
+                    False,
+                    "Applicant's complete address does not have a length of at least 10 & at most 300",
+                ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Applicant Address",
+                    length,
+                    True,
+                    "Applicant's complete address has a length of at least 10 & at most 300",
+                ]
+            if not expected or not val or val.lower() != expected.lower():
+                err_msgs.append("Complete address mismatch with provided value")
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Applicant Address",
+                    f"{val}, {expected}",
+                    False,
+                    "Complete address mismatch with provided value",
+                ]
+            else:
+                self.validation_policy_status_df.loc[
+                    len(self.validation_policy_status_df)
+                ] = [
+                    "Applicant Address",
+                    f"{val}, {expected}",
+                    True,
+                    "Complete address matches with provided value",
+                ]
+            self.complete_employee_address_err_msgs = (
+                ", ".join(err_msgs) if err_msgs else None
+            )
+            return self
+        except Exception as e:
+            # logger.exception(e, exc_info=True)
+            # return None
+            raise
+    # @model_validator(mode="after")
+    # def validate_employee_number(self):
+    #     try:
+    #         if self.employee_number and self.employee_number <= 25:
+    #             self.complete_employee_address_err_msgs = "Employee number low"
+    #         return self
+    #     except Exception as e:
+    #         raise
+    @computed_field
+    @property
+    def is_red_flagged(self) -> bool:
+        if any([
+            self.pay_dates_err_msgs,
+            self.full_name_err_msgs,
+            self.employer_name_err_msgs,
+            self.payslip_line_item_presence_err_msgs,
+            self.complete_employee_address_err_msgs,
+            # self.employee_number_err_msgs,
+        ]):
+            return True
+        return False

schemas/uk_address.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from pydantic import BaseModel, Field, field_validator
+import re
+UK_POSTCODE_REGEX = re.compile(r"^(GIR ?0AA|[A-Z]{1,2}\d{1,2}[A-Z]?\s?\d[A-Z]{2})$", re.IGNORECASE)
+class UKAddress(BaseModel):
+    street_address: str = Field(..., min_length=5, max_length=100)
+    city: str = Field(..., min_length=2, max_length=50)
+    postcode: str
+    country: str = "United Kingdom"
+    @field_validator("street_address")
+    @classmethod
+    def validate_street_address(cls, v: str) -> str:
+        if not re.match(r"^[a-zA-Z0-9\s,.'\-/#()]{5,100}$", v):
+            raise ValueError("Invalid characters in street address")
+        return v.strip()
+    @field_validator("city")
+    @classmethod
+    def validate_city(cls, v: str) -> str:
+        if not re.match(r"^[a-zA-Z\s\-']+$", v):
+            raise ValueError("City must only contain alphabetic characters, spaces, hyphens, or apostrophes")
+        return v.strip()
+    @field_validator("postcode")
+    @classmethod
+    def validate_postcode(cls, v: str) -> str:
+        cleaned = v.replace(" ", "").upper()
+        if not UK_POSTCODE_REGEX.match(cleaned):
+            raise ValueError("Invalid UK postcode format")
+        return v.upper()
+    @field_validator("country")
+    @classmethod
+    def validate_country(cls, v: str) -> str:
+        if v.strip().lower() not in ["united kingdom", "uk"]:
+            raise ValueError("Country must be United Kingdom or UK")
+        return "United Kingdom"

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .image_utils import im_2_b64, encode_image, load_pdf_as_image, generate_metadata
+from .process_files import process_uploaded_files
+from .logger import setup_logger