kryman27 commited on
Commit
ce4e81b
verified
1 Parent(s): 3e4d13c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -1
app.py CHANGED
@@ -19,4 +19,49 @@ def extract_invoice_data(pdf_file):
19
  full_text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
20
 
21
  # Tokenizacja danych z uwzgl臋dnieniem uk艂adu dokumentu
22
- tokens = tokenizer(full_text, return_tensors="pt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  full_text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
20
 
21
  # Tokenizacja danych z uwzgl臋dnieniem uk艂adu dokumentu
22
+ tokens = tokenizer(full_text, return_tensors="pt", truncation=True)
23
+
24
+ # Predykcja modelu
25
+ outputs = model(**tokens)
26
+ predictions = outputs.logits.argmax(-1).squeeze().tolist()
27
+
28
+ # Przetwarzanie wynik贸w
29
+ entities = []
30
+ for token, pred in zip(tokens.tokens(), predictions):
31
+ if pred > 0: # Pomijamy t艂o
32
+ entities.append((token, model.config.id2label[pred]))
33
+
34
+ # Wyszukiwanie kluczowych warto艣ci
35
+ seller_name = [token for token, label in entities if "ORG" in label]
36
+ seller_nip = nip_pattern.search(full_text)
37
+ kwoty = kwota_pattern.findall(full_text)
38
+ kwoty = [float(k.replace(",", ".")) for k in kwoty if k.replace(",", ".").replace(".", "").isdigit()]
39
+ total_amount = max(kwoty) if kwoty else None
40
+
41
+ # Szukamy daty p艂atno艣ci
42
+ payment_date = None
43
+ for line in full_text.split("\n"):
44
+ if any(keyword in line.lower() for keyword in payment_keywords):
45
+ date_match = data_pattern.search(line)
46
+ if date_match:
47
+ payment_date = date_match.group()
48
+ break
49
+
50
+ return {
51
+ "Sprzedawca": " ".join(seller_name) if seller_name else "Nie znaleziono",
52
+ "NIP": seller_nip.group() if seller_nip else "Nie znaleziono",
53
+ "Kwota ca艂kowita": total_amount if total_amount else "Nie znaleziono",
54
+ "Data p艂atno艣ci": payment_date if payment_date else "Nie znaleziono"
55
+ }
56
+
57
+ # Interfejs u偶ytkownika
58
+ iface = gr.Interface(
59
+ fn=extract_invoice_data,
60
+ inputs=gr.File(label="Wybierz plik PDF"),
61
+ outputs="json",
62
+ title="Ekstrakcja danych z faktury",
63
+ description="Prze艣lij plik PDF, a model zwr贸ci dane sprzedawcy, NIP, kwot臋 i dat臋 p艂atno艣ci."
64
+ )
65
+
66
+ if __name__ == "__main__":
67
+ iface.launch()