File size: 5,302 Bytes
52c1998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import tempfile
import os
import pdf2image
import zipfile
from .logger import setup_logger
import pandas as pd

logger = setup_logger(__name__)


def process_pdf(file_path: str, file_groups: dict, file_paths: list):
    images = pdf2image.convert_from_path(file_path)
    image_paths = []
    for i, img in enumerate(images):
        img_path = f"{file_path}_page_{i}.png"
        img.save(img_path, "PNG")
        file_paths.append(img_path)
        image_paths.append(img_path)

    file_groups[file_path] = image_paths

    return file_groups, file_paths


def process_uploaded_files(uploaded_files):
    file_paths = []
    file_groups = {}
    application_form = None
    memo = None
    temp_dir = tempfile.mkdtemp()
    print("temp_dir", temp_dir)

    for uploaded_file in uploaded_files:
        file_path = os.path.join(temp_dir, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())

        logger.info(
            f"file_path: {file_path}, uploaded_file.type : {uploaded_file.type}")

        if uploaded_file.type == "application/pdf":
            file_groups, file_paths = process_pdf(
                file_path=file_path, file_groups=file_groups, file_paths=file_paths)

        elif uploaded_file.type.startswith("image"):
            file_paths.append(file_path)
            file_groups[file_path] = [file_path]
        elif uploaded_file.type == "application/zip":
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                extract_dir = os.path.join(
                    temp_dir, uploaded_file.name.replace(".zip", ""))
                print(f"extract_dir : {extract_dir}")
                zip_ref.extractall(extract_dir)
                for root, _, files in os.walk(extract_dir):
                    for file in files:
                        if file.lower().endswith((".pdf")):
                            extracted_path = os.path.join(root, file)
                            file_groups, file_paths = process_pdf(
                                file_path=extracted_path,
                                file_groups=file_groups,
                                file_paths=file_paths)

                        elif file.lower().endswith((".png", ".jpg", ".jpeg")):
                            extracted_path = os.path.join(root, file)
                            file_paths.append(extracted_path)
                            file_groups[extracted_path] = [extracted_path]
                        elif file.lower().endswith((".csv")):
                            extracted_path = os.path.join(root, file)
                            application_form = pd.read_csv(
                                extracted_path, header=None)
                            logger.info(
                                f"application_form: {application_form}")
                            application_form[0] = 'application_summary_' + \
                                application_form[0].str.strip()
                        elif file.lower().endswith((".xlsx")):
                            extracted_path = os.path.join(root, file)
                            df_dict = pd.read_excel(
                                extracted_path, sheet_name=None, header=None)

                            # logger.info(f"df_dict: {df_dict}")

                            yellow_df = pd.DataFrame()

                            yellow_df = pd.concat(
                                [yellow_df, df_dict['Sheet1'].iloc[31:32]], axis=0, ignore_index=True)
                            yellow_df = pd.concat(
                                [yellow_df, df_dict['Sheet1'].iloc[33:34]], axis=0, ignore_index=True)
                            yellow_df = pd.concat(
                                [yellow_df, df_dict['Sheet1'].iloc[50:51]], axis=0, ignore_index=True)
                            yellow_df = yellow_df[[0, 1]]

                            blue_df = pd.DataFrame()
                            # Deposit details
                            blue_df = pd.concat(
                                [blue_df, df_dict['Sheet6'].iloc[44:47]], axis=0, ignore_index=True)
                            # memo = pd.concat([memo, df['Sheet6'].iloc[50:51]], axis=0, ignore_index=True)
                            blue_df = blue_df[[0, 1]]

                            green_df = pd.DataFrame()
                            # Monthly costs for both applicants
                            green_df = pd.concat(
                                [green_df, df_dict['Sheet7'].iloc[5:23]], axis=0, ignore_index=True)
                            green_df = green_df[[0, 1]]

                            memo = {
                                "Mortgage Details": yellow_df,
                                "Deposit details": blue_df,
                                "Monthly costs for both applicants": green_df,
                            }

                            logger.info(f"memo : {memo}")

                        else:
                            extracted_path = os.path.join(root, file)
                            file_paths.append(extracted_path)
                            file_groups[extracted_path] = [extracted_path]

    print(f"file_groups : {file_groups}")

    return file_paths, file_groups, temp_dir, application_form, memo