Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -6,10 +6,10 @@ from transformers import pipeline
|
|
6 |
# Model do rozpoznawania nazw organizacji i wartości numerycznych
|
7 |
extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
|
8 |
|
9 |
-
# Reguły do
|
10 |
-
nip_pattern = re.compile(r'\b\d{10}\b')
|
11 |
-
kwota_pattern = re.compile(r'\b\d+[\.,]?\d*\b')
|
12 |
-
data_pattern = re.compile(r'\b\d{2}\.\d{2}\.\d{4}\b')
|
13 |
|
14 |
def extract_invoice_data(pdf_file):
|
15 |
with pdfplumber.open(pdf_file) as pdf:
|
@@ -30,9 +30,14 @@ def extract_invoice_data(pdf_file):
|
|
30 |
|
31 |
# Znajdujemy wartości numeryczne dla NIP, kwot, dat
|
32 |
seller_nip = nip_pattern.search(full_text)
|
33 |
-
total_amount = max(kwota_pattern.findall(full_text), key=float, default=None)
|
34 |
invoice_date = data_pattern.search(full_text)
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
return {
|
37 |
"Sprzedawca": " ".join(seller_name) if seller_name else "Nie znaleziono",
|
38 |
"NIP": seller_nip.group() if seller_nip else "Nie znaleziono",
|
|
|
6 |
# Model do rozpoznawania nazw organizacji i wartości numerycznych
|
7 |
extractor = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", aggregation_strategy="simple")
|
8 |
|
9 |
+
# Reguły do wykrywania NIP, kwot, dat
|
10 |
+
nip_pattern = re.compile(r'\b\d{10}\b')
|
11 |
+
kwota_pattern = re.compile(r'\b\d+[\.,]?\d*\b')
|
12 |
+
data_pattern = re.compile(r'\b\d{2}\.\d{2}\.\d{4}\b')
|
13 |
|
14 |
def extract_invoice_data(pdf_file):
|
15 |
with pdfplumber.open(pdf_file) as pdf:
|
|
|
30 |
|
31 |
# Znajdujemy wartości numeryczne dla NIP, kwot, dat
|
32 |
seller_nip = nip_pattern.search(full_text)
|
|
|
33 |
invoice_date = data_pattern.search(full_text)
|
34 |
|
35 |
+
# **Naprawiamy błąd przetwarzania liczb**
|
36 |
+
kwoty = kwota_pattern.findall(full_text)
|
37 |
+
kwoty = [float(k.replace(",", ".")) for k in kwoty if k.replace(",", ".").replace(".", "").isdigit()] # Zamiana przecinka na kropkę
|
38 |
+
|
39 |
+
total_amount = max(kwoty) if kwoty else None # Pobranie największej wartości jako całkowita kwota faktury
|
40 |
+
|
41 |
return {
|
42 |
"Sprzedawca": " ".join(seller_name) if seller_name else "Nie znaleziono",
|
43 |
"NIP": seller_nip.group() if seller_nip else "Nie znaleziono",
|