Spaces:
Build error
Build error
import base64 | |
from io import BytesIO | |
import pymupdf | |
from PIL import Image | |
import streamlit as st | |
import os | |
from datetime import datetime | |
def generate_metadata(file_path): | |
"""Generate metadata dictionary from file path and properties""" | |
file_stat = os.stat(file_path) | |
file_name = os.path.basename(file_path) | |
parent_dir = os.path.basename(os.path.dirname(file_path)) | |
metadata = { | |
"File Name": file_name, | |
"Directory": parent_dir, | |
"File Size": f"{file_stat.st_size / 1024:.2f} KB", | |
"Last Modified": datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'), | |
"Created": datetime.fromtimestamp(file_stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'), | |
"File Extension": os.path.splitext(file_name)[1], | |
"Full Path": file_path | |
} | |
# Add image-specific metadata if it's an image | |
if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): | |
try: | |
with Image.open(file_path) as img: | |
metadata.update({ | |
"Image Size": f"{img.size[0]}x{img.size[1]}", | |
"Image Mode": img.mode, | |
"Image Format": img.format | |
}) | |
except Exception as e: | |
st.error(f"Error reading image metadata: {str(e)}") | |
# Add PDF-specific metadata if it's a PDF | |
elif file_name.lower().endswith('.pdf'): | |
try: | |
doc = pymupdf.Document(file_path) | |
metadata.update({ | |
"Page Count": len(doc), | |
"PDF Version": doc.pdf_version, | |
"Document Info": doc.metadata if doc.metadata else "No PDF metadata available" | |
}) | |
except Exception as e: | |
st.error(f"Error reading PDF metadata: {str(e)}") | |
return metadata | |
def load_pdf_as_image(file_path): | |
# Open PDF file | |
doc = pymupdf.Document(file_path) | |
# Get the first page | |
page = doc[0] | |
# Convert to image | |
pix = page.get_pixmap() | |
# Convert to PIL Image | |
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
return img | |
def im_2_b64(image): | |
buff = BytesIO() | |
image.save(buff, format="JPEG") | |
img_str = base64.b64encode(buff.getvalue()).decode("utf-8") | |
return img_str | |
def encode_image(image_path): | |
with open(image_path, "rb") as image_file: | |
return base64.b64encode(image_file.read()).decode('utf-8') | |