File size: 2,798 Bytes
52c1998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import streamlit as st
from utils.process_files import process_uploaded_files
from utils.document_display import display_based_on_card
import os
import pandas as pd
import json
from llm.document_analyzer import analyze_files

from PIL import Image
from utils import setup_logger

logger = setup_logger(__name__)

st.set_page_config(layout="wide")
if len(st.session_state) == 0:
    if 'tab_ocr' not in st.session_state:
        # if st.session_state['tab_ocr']['file_groups'] is None:
        st.session_state = {
            'tab_ocr': {
                'file_groups': None,
                'values_raw': None,
                'values_display': None

            }
        }

logger.info(f"st.session_state: {st.session_state}")
st.title("ID Analyser")

uploaded_files = st.file_uploader("Upload Images, PDFs", accept_multiple_files=True, type=[
                                  "png", "jpg", "jpeg", "pdf", "zip"])


if uploaded_files:
    st.session_state = {
        'tab_ocr': {
            'file_groups': None,
            'values_raw': None,
            'values_display': None

        }
    }
    file_paths, file_groups, temp_dir = process_uploaded_files(
        uploaded_files)  # Remove file paths later
    if st.session_state['tab_ocr']['file_groups'] is None:
        st.session_state['tab_ocr']['file_groups'] = file_groups

    analyze_clicked = st.button("Analyze")

    if analyze_clicked:
        st.session_state['tab_ocr']['values_raw'] = None
        st.session_state['tab_ocr']['values_display'] = None

    if analyze_clicked or st.session_state['tab_ocr']['values_display']:
        # if st.button("Analyze") or st.session_state['tab_ocr']['values_display'] is not None:
        if st.session_state['tab_ocr']['values_raw'] is None:
            analysis_results_groups, json_output_path = analyze_files(
                file_groups=st.session_state['tab_ocr']['file_groups'],
                temp_dir=temp_dir)

            st.session_state['tab_ocr']['values_raw'] = analysis_results_groups

        if st.session_state['tab_ocr']['values_display'] is None:
            st.session_state['tab_ocr']['values_display'] = {}

        for original_file, extracted_files in st.session_state['tab_ocr']['file_groups'].items():
            analysis_results_for_id = display_based_on_card(
                original_file=original_file,
                analysis_results_for_original_file=st.session_state[
                    'tab_ocr']['values_raw'][original_file],
                extracted_files=extracted_files)

        st.download_button(
            label="Download Analysis JSON",
            data=json.dumps(
                st.session_state['tab_ocr']['values_raw'], indent=4),
            file_name="analysis_results.json",
            mime="application/json"
        )