Spaces:
Build error
Build error
File size: 3,750 Bytes
48e7216 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import streamlit as st
from PIL import Image
import json
import os
from utils import im_2_b64, load_pdf_as_image, generate_metadata
from .llm import DocumentLLM
from prompts import (document_type_prompt, passport_prompt,
payslip_prompt, bank_statement_prompt,
p60_prompt, driving_license_prompt,
genric_ocr_prompt)
from utils.json_utils import restructure_documents
from utils import setup_logger
logger = setup_logger(__name__)
def analyze_files(file_groups: dict, temp_dir, current_upload):
document_llm = DocumentLLM()
results_group = {}
for original_file, extracted_files in file_groups.items():
results = {}
for file_name in extracted_files:
results[file_name] = {"status": "processed",
"type": "image", "dummy_data": 12345}
logger.info(f"file_name : {file_name}")
extension = file_name.lower().split('.')[-1]
results[file_name] = generate_metadata(file_name)
try:
logger.info(f"Starting analysis for {file_name}")
if extension in ['jpg', 'jpeg', 'png', 'gif']:
image = Image.open(file_name)
image_buffer = im_2_b64(image)
elif extension == 'pdf':
img = load_pdf_as_image(file_name)
image_buffer = im_2_b64(img)
st.image(img, use_container_width=True)
else:
st.write(
f"Unsupported file format: {extension}")
if image_buffer is not None:
results[file_name] = document_llm.call_llm_api(
prompt=document_type_prompt,
image_path=file_name)
logger.info(
f"File name: {file_name}, Results: {results[file_name]}")
document_type = results[file_name].get(
'document_type', None)
if document_type is not None:
prompt = None
if document_type == 'passport':
prompt = passport_prompt
elif document_type == 'driving_license':
prompt = driving_license_prompt
elif document_type == 'bank_statement':
prompt = bank_statement_prompt
elif document_type == 'payslip':
prompt = payslip_prompt
elif document_type == 'p60':
prompt = p60_prompt
else:
prompt = genric_ocr_prompt
if prompt is not None:
data = document_llm.call_llm_api(
prompt=prompt,
image_path=file_name)
results[file_name].update(data)
logger.info(f"{file_name}: {data}")
except Exception as e:
st.error(f"Error processing {file_name}: {str(e)}")
image_buffer = None
results_group[original_file] = results
results_transformed = restructure_documents(results_group)
st.session_state['uploads'][current_upload]['results_transformed'] = results_transformed
# Save analysis results to a JSON file
json_output_path = os.path.join(
temp_dir, "analysis_results.json")
with open(json_output_path, "w") as json_file:
json.dump(results_group, json_file, indent=4)
return results_group, json_output_path
|