Spaces:
Build error
Build error
import tempfile | |
import os | |
import pdf2image | |
import zipfile | |
from .logger import setup_logger | |
import pandas as pd | |
logger = setup_logger(__name__) | |
def process_pdf(file_path: str, file_groups: dict, file_paths: list): | |
images = pdf2image.convert_from_path(file_path) | |
image_paths = [] | |
for i, img in enumerate(images): | |
img_path = f"{file_path}_page_{i}.png" | |
img.save(img_path, "PNG") | |
file_paths.append(img_path) | |
image_paths.append(img_path) | |
file_groups[file_path] = image_paths | |
return file_groups, file_paths | |
def process_uploaded_files(uploaded_files): | |
file_paths = [] | |
file_groups = {} | |
application_form = None | |
memo = None | |
temp_dir = tempfile.mkdtemp() | |
print("temp_dir", temp_dir) | |
for uploaded_file in uploaded_files: | |
file_path = os.path.join(temp_dir, uploaded_file.name) | |
with open(file_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
logger.info( | |
f"file_path: {file_path}, uploaded_file.type : {uploaded_file.type}") | |
if uploaded_file.type == "application/pdf": | |
file_groups, file_paths = process_pdf( | |
file_path=file_path, file_groups=file_groups, file_paths=file_paths) | |
elif uploaded_file.type.startswith("image"): | |
file_paths.append(file_path) | |
file_groups[file_path] = [file_path] | |
elif uploaded_file.type == "application/zip": | |
with zipfile.ZipFile(file_path, 'r') as zip_ref: | |
extract_dir = os.path.join( | |
temp_dir, uploaded_file.name.replace(".zip", "")) | |
print(f"extract_dir : {extract_dir}") | |
zip_ref.extractall(extract_dir) | |
for root, _, files in os.walk(extract_dir): | |
for file in files: | |
if file.lower().endswith((".pdf")): | |
extracted_path = os.path.join(root, file) | |
file_groups, file_paths = process_pdf( | |
file_path=extracted_path, | |
file_groups=file_groups, | |
file_paths=file_paths) | |
elif file.lower().endswith((".png", ".jpg", ".jpeg")): | |
extracted_path = os.path.join(root, file) | |
file_paths.append(extracted_path) | |
file_groups[extracted_path] = [extracted_path] | |
elif file.lower().endswith((".csv")): | |
extracted_path = os.path.join(root, file) | |
application_form = pd.read_csv( | |
extracted_path, header=None) | |
logger.info( | |
f"application_form: {application_form}") | |
application_form[0] = 'application_summary_' + \ | |
application_form[0].str.strip() | |
elif file.lower().endswith((".xlsx")): | |
extracted_path = os.path.join(root, file) | |
df_dict = pd.read_excel( | |
extracted_path, sheet_name=None, header=None) | |
# logger.info(f"df_dict: {df_dict}") | |
yellow_df = pd.DataFrame() | |
yellow_df = pd.concat( | |
[yellow_df, df_dict['Sheet1'].iloc[31:32]], axis=0, ignore_index=True) | |
yellow_df = pd.concat( | |
[yellow_df, df_dict['Sheet1'].iloc[33:34]], axis=0, ignore_index=True) | |
yellow_df = pd.concat( | |
[yellow_df, df_dict['Sheet1'].iloc[50:51]], axis=0, ignore_index=True) | |
yellow_df = yellow_df[[0, 1]] | |
blue_df = pd.DataFrame() | |
# Deposit details | |
blue_df = pd.concat( | |
[blue_df, df_dict['Sheet6'].iloc[44:47]], axis=0, ignore_index=True) | |
# memo = pd.concat([memo, df['Sheet6'].iloc[50:51]], axis=0, ignore_index=True) | |
blue_df = blue_df[[0, 1]] | |
green_df = pd.DataFrame() | |
# Monthly costs for both applicants | |
green_df = pd.concat( | |
[green_df, df_dict['Sheet7'].iloc[5:23]], axis=0, ignore_index=True) | |
green_df = green_df[[0, 1]] | |
memo = { | |
"Mortgage Details": yellow_df, | |
"Deposit details": blue_df, | |
"Monthly costs for both applicants": green_df, | |
} | |
logger.info(f"memo : {memo}") | |
else: | |
extracted_path = os.path.join(root, file) | |
file_paths.append(extracted_path) | |
file_groups[extracted_path] = [extracted_path] | |
print(f"file_groups : {file_groups}") | |
return file_paths, file_groups, temp_dir, application_form, memo | |