Inference-comparison-APP-Document-Understanding

Runtime error

pierreguillou commited on Apr 4, 2023

Commit

dfc5d28

1 Parent(s): d9aec25

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -52,16 +52,21 @@ os.system('python -m pip install --upgrade pip')
 ## model / feature extractor / tokenizer
 import torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# model
-from transformers import LayoutLMv2ForTokenClassification
-model_id = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
-model = LayoutLMv2ForTokenClassification.from_pretrained(model_id);
-model.to(device);
 # feature extractor
 from transformers import LayoutLMv2FeatureExtractor
@@ -69,13 +74,16 @@ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
 # tokenizer
 from transformers import AutoTokenizer
-tokenizer_id = "xlm-roberta-base"
-tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
 # get labels
-id2label = model.config.id2label
-label2id = model.config.label2id
-num_labels = len(id2label)
 # APP outputs
 def app_outputs(uploaded_pdf):

 ## model / feature extractor / tokenizer
+# get device
 import torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+## model LiLT
+import transformers
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+tokenizer_lilt = AutoTokenizer.from_pretrained(model_id_lilt)
+model_lilt = AutoModelForTokenClassification.from_pretrained(model_id_lilt);
+model_lilt.to(device);
+## model LayoutXLM
+from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
+model_layoutxlm = LayoutLMv2ForTokenClassification.from_pretrained(model_id_layoutxlm);
+model_layoutxlm.to(device);
 # feature extractor
 from transformers import LayoutLMv2FeatureExtractor
 # tokenizer
 from transformers import AutoTokenizer
+tokenizer_layoutxlm = AutoTokenizer.from_pretrained(tokenizer_id_layoutxlm)
 # get labels
+id2label_lilt = model_lilt.config.id2label
+label2id_lilt = model_lilt.config.label2id
+num_labels_lilt = len(id2label_lilt)
+id2label_layoutxlm = model_layoutxlm.config.id2label
+label2id_layoutxlm = model_layoutxlm.config.label2id
+num_labels_layoutxlm = len(id2label_layoutxlm)
 # APP outputs
 def app_outputs(uploaded_pdf):