underwriting-workflow / utils /image_utils.py
vamsidharmuthireddy's picture
Upload 90 files
52c1998 verified
import base64
from io import BytesIO
import pymupdf
from PIL import Image
import streamlit as st
import os
from datetime import datetime
def generate_metadata(file_path):
"""Generate metadata dictionary from file path and properties"""
file_stat = os.stat(file_path)
file_name = os.path.basename(file_path)
parent_dir = os.path.basename(os.path.dirname(file_path))
metadata = {
"File Name": file_name,
"Directory": parent_dir,
"File Size": f"{file_stat.st_size / 1024:.2f} KB",
"Last Modified": datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
"Created": datetime.fromtimestamp(file_stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'),
"File Extension": os.path.splitext(file_name)[1],
"Full Path": file_path
}
# Add image-specific metadata if it's an image
if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
try:
with Image.open(file_path) as img:
metadata.update({
"Image Size": f"{img.size[0]}x{img.size[1]}",
"Image Mode": img.mode,
"Image Format": img.format
})
except Exception as e:
st.error(f"Error reading image metadata: {str(e)}")
# Add PDF-specific metadata if it's a PDF
elif file_name.lower().endswith('.pdf'):
try:
doc = pymupdf.Document(file_path)
metadata.update({
"Page Count": len(doc),
"PDF Version": doc.pdf_version,
"Document Info": doc.metadata if doc.metadata else "No PDF metadata available"
})
except Exception as e:
st.error(f"Error reading PDF metadata: {str(e)}")
return metadata
def load_pdf_as_image(file_path):
# Open PDF file
doc = pymupdf.Document(file_path)
# Get the first page
page = doc[0]
# Convert to image
pix = page.get_pixmap()
# Convert to PIL Image
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return img
def im_2_b64(image):
buff = BytesIO()
image.save(buff, format="JPEG")
img_str = base64.b64encode(buff.getvalue()).decode("utf-8")
return img_str
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')