import streamlit as st from utils.logger import setup_logger import pandas as pd from PIL import Image import os logger = setup_logger(__name__) def prune_payslip_for_display(analysis_results_for_id): data_to_display = {} data_to_display["document_category"] = "income_document" data_to_display["document_type"] = "payslip" data_to_display["employee_name"] = analysis_results_for_id.get( "employee_name", None) data_to_display["employer_name"] = analysis_results_for_id.get( "employer_name", None) data_to_display["employee_id"] = analysis_results_for_id.get( "employee_id", None) data_to_display["employee_address"] = analysis_results_for_id.get( "employee_address", None) data_to_display["employer_address"] = analysis_results_for_id.get( "employer_address", None) data_to_display["tax_code"] = analysis_results_for_id.get( "tax_code", None) data_to_display["payslip_date"] = analysis_results_for_id.get( "payslip_date", None) data_to_display["pay_period_start"] = analysis_results_for_id.get( "pay_period_start", None) data_to_display["pay_period_end"] = analysis_results_for_id.get( "pay_period_end", None) data_to_display["payment_frequency"] = analysis_results_for_id.get( "payment_frequency", None) data_to_display["basic_pay"] = analysis_results_for_id.get( "basic_pay", None) data_to_display["net_pay"] = analysis_results_for_id.get( "net_pay", None) data_to_display["gross_pay"] = analysis_results_for_id.get( "gross_pay", None) data_to_display["salary_components"] = analysis_results_for_id.get( "salary_components", None) data_to_display["ni_contribution"] = analysis_results_for_id.get( "ni_contribution", None) data_to_display["tax_deduction"] = analysis_results_for_id.get( "tax_deduction", None) data_to_display["other_deductions"] = analysis_results_for_id.get( "other_deductions", None) return data_to_display def display_payslip(extracted_files, analysis_results_pruned): col1, col2 = st.columns([2, 3]) logger.info(f"file_path while displaying: {extracted_files}") st.markdown("---") with col1: if len(extracted_files) > 1: st.image(extracted_files, caption=[os.path.basename( img) for img in extracted_files], use_container_width=True) else: image = Image.open(extracted_files[0]) st.image(image, caption=os.path.basename( extracted_files[0])) # , # use_container_width=True) logger.info( f"analysis_results_pruned : {analysis_results_pruned}") with col2: dict_str = {} for key, value in analysis_results_pruned.items(): if key not in ['other_deductions', 'salary_components']: dict_str[key] = value simple_df = pd.DataFrame.from_dict( dict_str, orient='index', columns=['Value']).reset_index() simple_df.columns = ['Key', 'Value'] simple_df = simple_df.fillna(value="Missing") simple_df.index += 1 st.dataframe(simple_df, use_container_width=True) st.markdown("Other Deductions") other_deductions_dict = analysis_results_pruned['other_deductions'] logger.info(f"other_deductions_dict : {other_deductions_dict}") try: # Flatten the nested list structure flat_list = [] for sublist in other_deductions_dict: for item in sublist: if isinstance(item, list): flat_list.extend(item) elif isinstance(item, dict): flat_list.append(item) # Filter and format into dataframe if valid if flat_list and isinstance(flat_list[0], dict) and 'name' in flat_list[0] and 'amount' in flat_list[0]: df = pd.DataFrame(flat_list) df.columns = ['Key', 'Value'] df = df.fillna(value="Missing") df.index += 1 st.dataframe(df, use_container_width=True) else: raise ValueError("Data is not in expected dictionary format.") except Exception as e: logger.info(f"Different format for other deductions: {e}") st.dataframe(other_deductions_dict) logger.info(f"simple_df: {simple_df}")