ZeeAI1 commited on
Commit
acf9e5c
·
verified ·
1 Parent(s): 18be3e7

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +17 -0
  2. requirements.txt +4 -0
  3. train_data.jsonl +5 -0
  4. train_flan_t5.py +43 -0
app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+
4
+ st.title("AI Accountant - Prompt-Based ERP Entry")
5
+
6
+ model_path = "google/flan-t5-large" # load directly from Hugging Face hub
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
10
+
11
+ user_input = st.text_area("Enter accounting transaction:")
12
+
13
+ if st.button("Generate Entry"):
14
+ inputs = tokenizer(user_input, return_tensors="pt")
15
+ outputs = model.generate(**inputs, max_new_tokens=128)
16
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
17
+ st.json(eval(result))
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ torch
4
+ streamlit
train_data.jsonl ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {"input": "Today received $245 from Tylor Smith who owed $250 for Samsung X1 sold 3 weeks ago.", "output": "{\"debit\": \"Cash: $245\", \"credit\": \"Accounts Receivable: $245\"}"}
2
+ {"input": "Received inventory worth $400 from ABC LLC on credit.", "output": "{\"debit\": \"Inventory: $400\", \"credit\": \"Accounts Payable: $400\"}"}
3
+ {"input": "Paid $1200 rent for April via bank transfer.", "output": "{\"debit\": \"Rent Expense: $1200\", \"credit\": \"Bank: $1200\"}"}
4
+ {"input": "Sold office chair to John Doe for $300 on credit.", "output": "{\"debit\": \"Accounts Receivable: $300\", \"credit\": \"Sales Revenue: $300\"}"}
5
+ {"input": "Received $3000 from client Smith & Co for past due invoice.", "output": "{\"debit\": \"Cash: $3000\", \"credit\": \"Accounts Receivable: $3000\"}"}
train_flan_t5.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
3
+
4
+ model_checkpoint = "google/flan-t5-large"
5
+ output_dir = "./finetuned-flan-t5"
6
+
7
+ dataset = load_dataset("json", data_files={"train": "train_data.jsonl"})
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
10
+
11
+ def preprocess_function(examples):
12
+ inputs = examples["input"]
13
+ targets = examples["output"]
14
+ model_inputs = tokenizer(inputs, max_length=512, truncation=True)
15
+ labels = tokenizer(targets, max_length=128, truncation=True)
16
+ model_inputs["labels"] = labels["input_ids"]
17
+ return model_inputs
18
+
19
+ tokenized_datasets = dataset.map(preprocess_function, batched=True)
20
+
21
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
22
+
23
+ training_args = Seq2SeqTrainingArguments(
24
+ output_dir=output_dir,
25
+ evaluation_strategy="no",
26
+ learning_rate=5e-5,
27
+ per_device_train_batch_size=2,
28
+ num_train_epochs=3,
29
+ weight_decay=0.01,
30
+ save_total_limit=2,
31
+ push_to_hub=False
32
+ )
33
+
34
+ trainer = Seq2SeqTrainer(
35
+ model=model,
36
+ args=training_args,
37
+ train_dataset=tokenized_datasets["train"]
38
+ )
39
+
40
+ trainer.train()
41
+
42
+ model.save_pretrained(output_dir)
43
+ tokenizer.save_pretrained(output_dir)