Spaces:

ignaciaginting
/

extract_from_doc

Build error

extract_from_doc / app.py

Update app.py

adf0a0e verified 6 days ago

1.06 kB

	import streamlit as st
	from pdf_extract_kit.tasks.ocr import OCRTask
	from pdf_extract_kit.utils.config_loader import load_config
	import os

	# Streamlit app title
	st.title("PDF Table Extraction")

	# File uploader to upload PDF
	uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

	if uploaded_file is not None:
	# Save the uploaded file to a temporary location
	with open("temp.pdf", "wb") as f:
	f.write(uploaded_file.read())

	# Configuration path for OCR task
	config_path = "PDF-Extract-Kit/configs/ocr.yaml" # Updated config path
	config = load_config(config_path)

	# Initialize the OCR task
	task = OCRTask(config)

	# Perform OCR task on the uploaded PDF
	extracted_data = task.process("temp.pdf", save_dir="outputs", visualize=True)

	# Display the extracted values
	st.write("Extracted Data:")
	st.write(extracted_data)

	# Optional: Visualize the result (depending on how the output is generated)
	# st.image('path_to_visualization_image', caption='Extracted Table', use_column_width=True)