Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,4 +19,49 @@ def extract_invoice_data(pdf_file):
|
|
19 |
full_text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
20 |
|
21 |
# Tokenizacja danych z uwzgl臋dnieniem uk艂adu dokumentu
|
22 |
-
tokens = tokenizer(full_text, return_tensors="pt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
full_text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
20 |
|
21 |
# Tokenizacja danych z uwzgl臋dnieniem uk艂adu dokumentu
|
22 |
+
tokens = tokenizer(full_text, return_tensors="pt", truncation=True)
|
23 |
+
|
24 |
+
# Predykcja modelu
|
25 |
+
outputs = model(**tokens)
|
26 |
+
predictions = outputs.logits.argmax(-1).squeeze().tolist()
|
27 |
+
|
28 |
+
# Przetwarzanie wynik贸w
|
29 |
+
entities = []
|
30 |
+
for token, pred in zip(tokens.tokens(), predictions):
|
31 |
+
if pred > 0: # Pomijamy t艂o
|
32 |
+
entities.append((token, model.config.id2label[pred]))
|
33 |
+
|
34 |
+
# Wyszukiwanie kluczowych warto艣ci
|
35 |
+
seller_name = [token for token, label in entities if "ORG" in label]
|
36 |
+
seller_nip = nip_pattern.search(full_text)
|
37 |
+
kwoty = kwota_pattern.findall(full_text)
|
38 |
+
kwoty = [float(k.replace(",", ".")) for k in kwoty if k.replace(",", ".").replace(".", "").isdigit()]
|
39 |
+
total_amount = max(kwoty) if kwoty else None
|
40 |
+
|
41 |
+
# Szukamy daty p艂atno艣ci
|
42 |
+
payment_date = None
|
43 |
+
for line in full_text.split("\n"):
|
44 |
+
if any(keyword in line.lower() for keyword in payment_keywords):
|
45 |
+
date_match = data_pattern.search(line)
|
46 |
+
if date_match:
|
47 |
+
payment_date = date_match.group()
|
48 |
+
break
|
49 |
+
|
50 |
+
return {
|
51 |
+
"Sprzedawca": " ".join(seller_name) if seller_name else "Nie znaleziono",
|
52 |
+
"NIP": seller_nip.group() if seller_nip else "Nie znaleziono",
|
53 |
+
"Kwota ca艂kowita": total_amount if total_amount else "Nie znaleziono",
|
54 |
+
"Data p艂atno艣ci": payment_date if payment_date else "Nie znaleziono"
|
55 |
+
}
|
56 |
+
|
57 |
+
# Interfejs u偶ytkownika
|
58 |
+
iface = gr.Interface(
|
59 |
+
fn=extract_invoice_data,
|
60 |
+
inputs=gr.File(label="Wybierz plik PDF"),
|
61 |
+
outputs="json",
|
62 |
+
title="Ekstrakcja danych z faktury",
|
63 |
+
description="Prze艣lij plik PDF, a model zwr贸ci dane sprzedawcy, NIP, kwot臋 i dat臋 p艂atno艣ci."
|
64 |
+
)
|
65 |
+
|
66 |
+
if __name__ == "__main__":
|
67 |
+
iface.launch()
|