Spaces:
Build error
Build error
import streamlit as st | |
from PIL import Image | |
import json | |
import os | |
from utils import im_2_b64, load_pdf_as_image, generate_metadata | |
from .llm import DocumentLLM | |
from prompts import (document_type_prompt, passport_prompt, | |
payslip_prompt, bank_statement_prompt, | |
p60_prompt, driving_license_prompt, | |
genric_ocr_prompt) | |
from utils.json_utils import restructure_documents | |
from utils import setup_logger | |
logger = setup_logger(__name__) | |
def analyze_files(file_groups: dict, temp_dir, current_upload): | |
document_llm = DocumentLLM() | |
results_group = {} | |
for original_file, extracted_files in file_groups.items(): | |
results = {} | |
for file_name in extracted_files: | |
results[file_name] = {"status": "processed", | |
"type": "image", "dummy_data": 12345} | |
logger.info(f"file_name : {file_name}") | |
extension = file_name.lower().split('.')[-1] | |
results[file_name] = generate_metadata(file_name) | |
try: | |
logger.info(f"Starting analysis for {file_name}") | |
if extension in ['jpg', 'jpeg', 'png', 'gif']: | |
image = Image.open(file_name) | |
image_buffer = im_2_b64(image) | |
elif extension == 'pdf': | |
img = load_pdf_as_image(file_name) | |
image_buffer = im_2_b64(img) | |
st.image(img, use_container_width=True) | |
else: | |
st.write( | |
f"Unsupported file format: {extension}") | |
if image_buffer is not None: | |
results[file_name] = document_llm.call_llm_api( | |
prompt=document_type_prompt, | |
image_path=file_name) | |
logger.info( | |
f"File name: {file_name}, Results: {results[file_name]}") | |
document_type = results[file_name].get( | |
'document_type', None) | |
if document_type is not None: | |
prompt = None | |
if document_type == 'passport': | |
prompt = passport_prompt | |
elif document_type == 'driving_license': | |
prompt = driving_license_prompt | |
elif document_type == 'bank_statement': | |
prompt = bank_statement_prompt | |
elif document_type == 'payslip': | |
prompt = payslip_prompt | |
elif document_type == 'p60': | |
prompt = p60_prompt | |
else: | |
prompt = genric_ocr_prompt | |
if prompt is not None: | |
data = document_llm.call_llm_api( | |
prompt=prompt, | |
image_path=file_name) | |
results[file_name].update(data) | |
logger.info(f"{file_name}: {data}") | |
except Exception as e: | |
st.error(f"Error processing {file_name}: {str(e)}") | |
image_buffer = None | |
results_group[original_file] = results | |
results_transformed = restructure_documents(results_group) | |
st.session_state['uploads'][current_upload]['results_transformed'] = results_transformed | |
# Save analysis results to a JSON file | |
json_output_path = os.path.join( | |
temp_dir, "analysis_results.json") | |
with open(json_output_path, "w") as json_file: | |
json.dump(results_group, json_file, indent=4) | |
return results_group, json_output_path | |