import streamlit as st from pdf_extract_kit.tasks.ocr import OCRTask from pdf_extract_kit.utils.config_loader import load_config import os # Streamlit app title st.title("PDF Table Extraction") # File uploader to upload PDF uploaded_file = st.file_uploader("Choose a PDF file", type="pdf") if uploaded_file is not None: # Save the uploaded file to a temporary location with open("temp.pdf", "wb") as f: f.write(uploaded_file.read()) # Configuration path for OCR task config_path = "PDF-Extract-Kit/configs/ocr.yaml" # Updated config path config = load_config(config_path) # Initialize the OCR task task = OCRTask(config) # Perform OCR task on the uploaded PDF extracted_data = task.process("temp.pdf", save_dir="outputs", visualize=True) # Display the extracted values st.write("Extracted Data:") st.write(extracted_data) # Optional: Visualize the result (depending on how the output is generated) # st.image('path_to_visualization_image', caption='Extracted Table', use_column_width=True)