Alexvatti's picture
Update app.py
70f188e verified
raw
history blame contribute delete
1.81 kB
import streamlit as st
from PIL import Image
from transformers import DonutProcessor, VisionEncoderDecoderModel
import torch
# Load Donut model and processor
@st.cache_resource
def load_model():
processor = DonutProcessor.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
model = VisionEncoderDecoderModel.from_pretrained("naver-clova-ix/donut-base-finetuned-docvqa")
return processor, model
processor, model = load_model()
st.title("🧾 Invoice Table Extractor - Hugging Face Donut")
st.write("Upload an invoice image to extract the table (code article, designation, quantity, unit prices, totals).")
uploaded_file = st.file_uploader("Choose an image", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
image = Image.open(uploaded_file).convert("RGB")
st.image(image, caption="Uploaded Invoice", use_column_width=True)
with st.spinner("πŸ” Analyzing..."):
# Preprocess image
pixel_values = processor(image, return_tensors="pt").pixel_values
# Prompt for table extraction
prompt = "<s_docvqa><question>Extract the invoice items table with code article, designation, quantity, unit prices, and totals.</question><answer>"
decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
# Generate prediction
outputs = model.generate(
pixel_values,
decoder_input_ids=decoder_input_ids,
max_length=512,
early_stopping=True
)
# Decode response
result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
result = result.replace("<s_docvqa><question>", "").replace("</question><answer>", "").strip()
st.subheader("πŸ“‹ Extracted Table Info")
st.code(result)