ignaciaginting's picture
Update app.py
adf0a0e verified
import streamlit as st
from pdf_extract_kit.tasks.ocr import OCRTask
from pdf_extract_kit.utils.config_loader import load_config
import os
# Streamlit app title
st.title("PDF Table Extraction")
# File uploader to upload PDF
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
if uploaded_file is not None:
# Save the uploaded file to a temporary location
with open("temp.pdf", "wb") as f:
f.write(uploaded_file.read())
# Configuration path for OCR task
config_path = "PDF-Extract-Kit/configs/ocr.yaml" # Updated config path
config = load_config(config_path)
# Initialize the OCR task
task = OCRTask(config)
# Perform OCR task on the uploaded PDF
extracted_data = task.process("temp.pdf", save_dir="outputs", visualize=True)
# Display the extracted values
st.write("Extracted Data:")
st.write(extracted_data)
# Optional: Visualize the result (depending on how the output is generated)
# st.image('path_to_visualization_image', caption='Extracted Table', use_column_width=True)