Spaces:

ignaciaginting
/

extract_from_doc

Build error

File size: 1,063 Bytes

005a185
adf0a0e
 
b39b068
adf0a0e

import streamlit as st
from pdf_extract_kit.tasks.ocr import OCRTask
from pdf_extract_kit.utils.config_loader import load_config
import os

# Streamlit app title
st.title("PDF Table Extraction")

# File uploader to upload PDF
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    # Save the uploaded file to a temporary location
    with open("temp.pdf", "wb") as f:
        f.write(uploaded_file.read())

    # Configuration path for OCR task
    config_path = "PDF-Extract-Kit/configs/ocr.yaml"  # Updated config path
    config = load_config(config_path)

    # Initialize the OCR task
    task = OCRTask(config)

    # Perform OCR task on the uploaded PDF
    extracted_data = task.process("temp.pdf", save_dir="outputs", visualize=True)

    # Display the extracted values
    st.write("Extracted Data:")
    st.write(extracted_data)

    # Optional: Visualize the result (depending on how the output is generated)
    # st.image('path_to_visualization_image', caption='Extracted Table', use_column_width=True)