File size: 4,517 Bytes
52c1998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import streamlit as st
from utils.logger import setup_logger
import pandas as pd
from PIL import Image
import os

logger = setup_logger(__name__)


def prune_payslip_for_display(analysis_results_for_id):
    data_to_display = {}

    data_to_display["document_category"] = "income_document"
    data_to_display["document_type"] = "payslip"

    data_to_display["employee_name"] = analysis_results_for_id.get(
        "employee_name", None)
    data_to_display["employer_name"] = analysis_results_for_id.get(
        "employer_name", None)
    data_to_display["employee_id"] = analysis_results_for_id.get(
        "employee_id", None)
    data_to_display["employee_address"] = analysis_results_for_id.get(
        "employee_address", None)
    data_to_display["employer_address"] = analysis_results_for_id.get(
        "employer_address", None)
    data_to_display["tax_code"] = analysis_results_for_id.get(
        "tax_code", None)
    data_to_display["payslip_date"] = analysis_results_for_id.get(
        "payslip_date", None)
    data_to_display["pay_period_start"] = analysis_results_for_id.get(
        "pay_period_start", None)
    data_to_display["pay_period_end"] = analysis_results_for_id.get(
        "pay_period_end", None)
    data_to_display["payment_frequency"] = analysis_results_for_id.get(
        "payment_frequency", None)
    data_to_display["basic_pay"] = analysis_results_for_id.get(
        "basic_pay", None)
    data_to_display["net_pay"] = analysis_results_for_id.get(
        "net_pay", None)
    data_to_display["gross_pay"] = analysis_results_for_id.get(
        "gross_pay", None)
    data_to_display["salary_components"] = analysis_results_for_id.get(
        "salary_components", None)
    data_to_display["ni_contribution"] = analysis_results_for_id.get(
        "ni_contribution", None)
    data_to_display["tax_deduction"] = analysis_results_for_id.get(
        "tax_deduction", None)
    data_to_display["other_deductions"] = analysis_results_for_id.get(
        "other_deductions", None)

    return data_to_display


def display_payslip(extracted_files, analysis_results_pruned):

    col1, col2 = st.columns([2, 3])

    logger.info(f"file_path while displaying: {extracted_files}")
    st.markdown("---")

    with col1:
        if len(extracted_files) > 1:
            st.image(extracted_files, caption=[os.path.basename(
                img) for img in extracted_files], use_container_width=True)
        else:
            image = Image.open(extracted_files[0])
            st.image(image, caption=os.path.basename(
                extracted_files[0]))  # ,
            # use_container_width=True)

        logger.info(
            f"analysis_results_pruned : {analysis_results_pruned}")

    with col2:

        dict_str = {}

        for key, value in analysis_results_pruned.items():
            if key not in ['other_deductions', 'salary_components']:
                dict_str[key] = value

        simple_df = pd.DataFrame.from_dict(
            dict_str,
            orient='index', columns=['Value']).reset_index()
        simple_df.columns = ['Key', 'Value']
        simple_df = simple_df.fillna(value="Missing")
        simple_df.index += 1
        st.dataframe(simple_df, use_container_width=True)

        st.markdown("Other Deductions")

        other_deductions_dict = analysis_results_pruned['other_deductions']
        logger.info(f"other_deductions_dict : {other_deductions_dict}")

        try:
            # Flatten the nested list structure
            flat_list = []
            for sublist in other_deductions_dict:
                for item in sublist:
                    if isinstance(item, list):
                        flat_list.extend(item)
                    elif isinstance(item, dict):
                        flat_list.append(item)

            # Filter and format into dataframe if valid
            if flat_list and isinstance(flat_list[0], dict) and 'name' in flat_list[0] and 'amount' in flat_list[0]:
                df = pd.DataFrame(flat_list)
                df.columns = ['Key', 'Value']
                df = df.fillna(value="Missing")
                df.index += 1
                st.dataframe(df, use_container_width=True)
            else:
                raise ValueError("Data is not in expected dictionary format.")
        except Exception as e:
            logger.info(f"Different format for other deductions: {e}")
            st.dataframe(other_deductions_dict)
            

        logger.info(f"simple_df: {simple_df}")