Spaces:

bvd757
/

Papers_classification

Running

App Files Files Community

bvd757 commited on Apr 8

Commit

b8c8071

1 Parent(s): 8292fd2

app fixes_v6

Browse files

Files changed (1) hide show

app.py +14 -6

app.py CHANGED Viewed

@@ -7,7 +7,13 @@ from transformers import (
     DebertaV2Model,
     DebertaV2Tokenizer,
 )
-import sentencepiece
 model_name = "microsoft/deberta-v3-base"
 tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
@@ -37,6 +43,7 @@ def classify_text(text, model, tokenizer, device, threshold=0.5):
 def get_themes(text, model, tokenizer, label_to_theme, device, limit=5):
     probabilities, _ = classify_text(text, model, tokenizer, device)
     themes = []
     for label in probabilities[0].argsort()[-limit:]:
         themes.append((label_to_theme[str(label)], probabilities[0][label]))
@@ -57,7 +64,7 @@ class DebertPaperClassifier(torch.nn.Module):
             torch.nn.Linear(512, num_labels)
         )
-        #self._init_weights()
         if class_weights is not None:
             self.loss_fct = torch.nn.BCEWithLogitsLoss(weight=class_weights.to(device))
         else:
@@ -112,11 +119,11 @@ def load_model(test=False):
         path = '/home/user/app'
         with open(f'{path}/label_to_theme.json', 'r') as f:
             label_to_theme = json.load(f)
-    class_weights = torch.load(f'{path}/class_weights.pth').to(device)
-    model = DebertPaperClassifier(device=device, num_labels=len(label_to_theme), class_weights=class_weights).to(device)
     model.load_state_dict(torch.load(f"{path}/full_model_v4.pth", map_location=device))
     if test:
         print(device)
@@ -126,6 +133,7 @@ def load_model(test=False):
         print(get_themes(text, model, tokenizer, label_to_theme, device))
     return model, tokenizer, label_to_theme, device
 def kek():
     title = st.text_input("Title")

     DebertaV2Model,
     DebertaV2Tokenizer,
 )
+import sentencepiece
+try:
+    import sentencepiece
+except ImportError:
+    st.error("Требуется установить SentencePiece: pip install sentencepiece")
+    st.stop()
 model_name = "microsoft/deberta-v3-base"
 tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
 def get_themes(text, model, tokenizer, label_to_theme, device, limit=5):
     probabilities, _ = classify_text(text, model, tokenizer, device)
+    print(probabilities)
     themes = []
     for label in probabilities[0].argsort()[-limit:]:
         themes.append((label_to_theme[str(label)], probabilities[0][label]))
             torch.nn.Linear(512, num_labels)
         )
+        self._init_weights()
         if class_weights is not None:
             self.loss_fct = torch.nn.BCEWithLogitsLoss(weight=class_weights.to(device))
         else:
         path = '/home/user/app'
         with open(f'{path}/label_to_theme.json', 'r') as f:
             label_to_theme = json.load(f)
+    #class_weights = torch.load('model_info/class_weights.pth').to(device)
+    # model = DebertPaperClassifier(device=device, num_labels=len(label_to_theme), class_weights=class_weights).to(device)
+    # model.load_state_dict(torch.load("model_info/full_model_v4.pth", map_location=device))
+    model = DebertPaperClassifierV5(device=device, num_labels=47).to(device)
     model.load_state_dict(torch.load(f"{path}/full_model_v4.pth", map_location=device))
     if test:
         print(device)
         print(get_themes(text, model, tokenizer, label_to_theme, device))
     return model, tokenizer, label_to_theme, device
 def kek():
     title = st.text_input("Title")