YAMITEK commited on
Commit
06c933c
·
verified ·
1 Parent(s): 6dc5a7e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import fitz # PyMuPDF
3
+ from PIL import Image
4
+ import io
5
+ import pandas as pd
6
+ import json
7
+ import google.generativeai as genai
8
+ from dotenv import load_dotenv
9
+ import os
10
+ import requests
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # Configure Google Generative AI API
16
+ genai.configure(api_key="AIzaSyANPvOHyc2wp3xCX8oF1t-Fhxx8hNCe_rU")
17
+
18
+ def get_response(model, user_input, image, prompt):
19
+ """Generate response from the model using input and image data."""
20
+ try:
21
+ # This is where the actual interaction with the model happens
22
+ response = model.generate_content([user_input, image, prompt])
23
+ return response.text
24
+ except requests.exceptions.RequestException as e:
25
+ st.error(f"⚠️ Error while calling the API: {e}")
26
+ return None
27
+
28
+ def convert_pdf_to_images(pdf_bytes):
29
+ """Convert PDF to images using fitz (PyMuPDF)."""
30
+ images = []
31
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
32
+ for page in doc:
33
+ pix = page.get_pixmap(dpi=300)
34
+ img = Image.open(io.BytesIO(pix.tobytes("png")))
35
+ images.append(img)
36
+ return images
37
+
38
+ # Streamlit UI
39
+ st.set_page_config(page_title="Invoice Extractor", layout="centered")
40
+ st.title("📄 Invoice Table Extractor using Gemini AI")
41
+
42
+ uploaded_pdf = st.file_uploader("Upload a PDF Invoice", type=["pdf"])
43
+
44
+ if uploaded_pdf:
45
+ with st.spinner("Converting PDF to images..."):
46
+ images = convert_pdf_to_images(uploaded_pdf.read())
47
+
48
+ st.image(images[0], caption="Page 1 of PDF", use_column_width=True)
49
+
50
+ if st.button("Extract Table from Invoice"):
51
+ with st.spinner("Extracting data with Gemini..."):
52
+ model = genai.GenerativeModel('gemini-1.5-flash')
53
+
54
+ prompt = """Extract the invoice table from the uploaded invoice document.
55
+ The table should include the following columns:
56
+ - CODE ARTICLE
57
+ - DESIGNATION
58
+ - QTE COMMANDÉE
59
+ - QTE LIVRÉE
60
+ - PRIX UNIT. REF
61
+ - PRIX UNIT. HT
62
+ - PRIX UNIT. TTC
63
+ - TOTAL HT
64
+ - TVA %
65
+ Also, extract and attach the following metadata fields to each row:
66
+ - N° CLIENT
67
+ - NOM CLIENT
68
+ - N° FACTURE
69
+ - DATE FACTURE
70
+ - DATE DE CDE
71
+ - Supplier/Company Name
72
+ After extraction:
73
+ - Create a clean pandas DataFrame containing all the above fields.
74
+ - Drop any rows where CODE ARTICLE is empty or missing.
75
+ - Return the data in JSON dictionary format.
76
+ """
77
+
78
+ try:
79
+ response_text = get_response(model, prompt, images[0], prompt)
80
+ if response_text:
81
+ st.success("✅ Gemini responded!")
82
+
83
+ # Parse JSON response
84
+ start_index = response_text.find('[')
85
+ end_index = response_text.rfind(']') + 1
86
+ clean_json = response_text[start_index:end_index]
87
+ data = json.loads(clean_json)
88
+ df = pd.DataFrame(data)
89
+
90
+ # Clean data by removing rows with empty CODE ARTICLE
91
+ df = df[df["CODE ARTICLE"].notna() & (df["CODE ARTICLE"] != "")]
92
+
93
+ st.dataframe(df)
94
+
95
+ # Allow the user to download the result as an Excel file
96
+ output = io.BytesIO()
97
+ with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
98
+ df.to_excel(writer, index=False, sheet_name="Invoice Data")
99
+ writer.save()
100
+ st.download_button(
101
+ label="📥 Download Excel",
102
+ data=output.getvalue(),
103
+ file_name="invoice_extracted.xlsx",
104
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
105
+ )
106
+
107
+ except Exception as e:
108
+ st.error("⚠️ Failed to extract or parse data.")
109
+ st.exception(e)