Spaces:

YAMITEK
/

invoice_pdf_to_excel_using_gemini

Running

App Files Files Community

YAMITEK commited on 15 days ago

Commit

06c933c

verified ·

1 Parent(s): 6dc5a7e

Create app.py

Browse files

Files changed (1) hide show

app.py +109 -0

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import streamlit as st
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+import pandas as pd
+import json
+import google.generativeai as genai
+from dotenv import load_dotenv
+import os
+import requests
+# Load environment variables
+load_dotenv()
+# Configure Google Generative AI API
+genai.configure(api_key="AIzaSyANPvOHyc2wp3xCX8oF1t-Fhxx8hNCe_rU")
+def get_response(model, user_input, image, prompt):
+    """Generate response from the model using input and image data."""
+    try:
+        # This is where the actual interaction with the model happens
+        response = model.generate_content([user_input, image, prompt])
+        return response.text
+    except requests.exceptions.RequestException as e:
+        st.error(f"⚠️ Error while calling the API: {e}")
+        return None
+def convert_pdf_to_images(pdf_bytes):
+    """Convert PDF to images using fitz (PyMuPDF)."""
+    images = []
+    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    for page in doc:
+        pix = page.get_pixmap(dpi=300)
+        img = Image.open(io.BytesIO(pix.tobytes("png")))
+        images.append(img)
+    return images
+# Streamlit UI
+st.set_page_config(page_title="Invoice Extractor", layout="centered")
+st.title("📄 Invoice Table Extractor using Gemini AI")
+uploaded_pdf = st.file_uploader("Upload a PDF Invoice", type=["pdf"])
+if uploaded_pdf:
+    with st.spinner("Converting PDF to images..."):
+        images = convert_pdf_to_images(uploaded_pdf.read())
+    st.image(images[0], caption="Page 1 of PDF", use_column_width=True)
+    if st.button("Extract Table from Invoice"):
+        with st.spinner("Extracting data with Gemini..."):
+            model = genai.GenerativeModel('gemini-1.5-flash')
+            prompt = """Extract the invoice table from the uploaded invoice document.
+The table should include the following columns:
+- CODE ARTICLE
+- DESIGNATION
+- QTE COMMANDÉE
+- QTE LIVRÉE
+- PRIX UNIT. REF
+- PRIX UNIT. HT
+- PRIX UNIT. TTC
+- TOTAL HT
+- TVA %
+Also, extract and attach the following metadata fields to each row:
+- N° CLIENT
+- NOM CLIENT
+- N° FACTURE
+- DATE FACTURE
+- DATE DE CDE
+- Supplier/Company Name
+After extraction:
+- Create a clean pandas DataFrame containing all the above fields.
+- Drop any rows where CODE ARTICLE is empty or missing.
+- Return the data in JSON dictionary format.
+"""
+            try:
+                response_text = get_response(model, prompt, images[0], prompt)
+                if response_text:
+                    st.success("✅ Gemini responded!")
+                    # Parse JSON response
+                    start_index = response_text.find('[')
+                    end_index = response_text.rfind(']') + 1
+                    clean_json = response_text[start_index:end_index]
+                    data = json.loads(clean_json)
+                    df = pd.DataFrame(data)
+                    # Clean data by removing rows with empty CODE ARTICLE
+                    df = df[df["CODE ARTICLE"].notna() & (df["CODE ARTICLE"] != "")]
+                    st.dataframe(df)
+                    # Allow the user to download the result as an Excel file
+                    output = io.BytesIO()
+                    with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
+                        df.to_excel(writer, index=False, sheet_name="Invoice Data")
+                        writer.save()
+                    st.download_button(
+                        label="📥 Download Excel",
+                        data=output.getvalue(),
+                        file_name="invoice_extracted.xlsx",
+                        mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+                    )
+            except Exception as e:
+                st.error("⚠️ Failed to extract or parse data.")
+                st.exception(e)