Spaces:
Build error
Build error
File size: 2,406 Bytes
48e7216 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import base64
from io import BytesIO
import pymupdf
from PIL import Image
import streamlit as st
import os
from datetime import datetime
def generate_metadata(file_path):
"""Generate metadata dictionary from file path and properties"""
file_stat = os.stat(file_path)
file_name = os.path.basename(file_path)
parent_dir = os.path.basename(os.path.dirname(file_path))
metadata = {
"File Name": file_name,
"Directory": parent_dir,
"File Size": f"{file_stat.st_size / 1024:.2f} KB",
"Last Modified": datetime.fromtimestamp(file_stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S'),
"Created": datetime.fromtimestamp(file_stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S'),
"File Extension": os.path.splitext(file_name)[1],
"Full Path": file_path
}
# Add image-specific metadata if it's an image
if file_name.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
try:
with Image.open(file_path) as img:
metadata.update({
"Image Size": f"{img.size[0]}x{img.size[1]}",
"Image Mode": img.mode,
"Image Format": img.format
})
except Exception as e:
st.error(f"Error reading image metadata: {str(e)}")
# Add PDF-specific metadata if it's a PDF
elif file_name.lower().endswith('.pdf'):
try:
doc = pymupdf.Document(file_path)
metadata.update({
"Page Count": len(doc),
"PDF Version": doc.pdf_version,
"Document Info": doc.metadata if doc.metadata else "No PDF metadata available"
})
except Exception as e:
st.error(f"Error reading PDF metadata: {str(e)}")
return metadata
def load_pdf_as_image(file_path):
# Open PDF file
doc = pymupdf.Document(file_path)
# Get the first page
page = doc[0]
# Convert to image
pix = page.get_pixmap()
# Convert to PIL Image
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
return img
def im_2_b64(image):
buff = BytesIO()
image.save(buff, format="JPEG")
img_str = base64.b64encode(buff.getvalue()).decode("utf-8")
return img_str
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
|