Spaces:

brisklyapp
/

strings-similarity

Runtime error

emiliosheinz commited on Feb 25, 2023

Commit

9c7a582

1 Parent(s): e922469

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
-import torch
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# load the pre-trained and fine-tuned model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
-model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
 # set the app title
 st.title("Brazilian Portuguese Sentence Similarity Checker")
@@ -16,15 +16,14 @@ sentence2 = st.text_input("Enter the second sentence:")
 # check if both sentences are not empty
 if sentence1 and sentence2:
     # tokenize the sentences and get their IDs
-    input_ids = tokenizer.encode(sentence1, sentence2, truncation=True, padding=True, return_tensors='pt')
-    # pass the IDs through the model to get the logits
     with torch.no_grad():
-        logits = model(input_ids)[0]
-    # apply softmax to the logits to get the predicted probabilities
-    probs = torch.softmax(logits, dim=1).squeeze().tolist()
-    # display the predicted probabilities to the user
-    st.write("Probability that the sentences are similar:", probs[1])
-    st.write("Probability that the sentences are dissimilar:", probs[0])

 import streamlit as st
+import torch
+from transformers import AutoTokenizer, AutoModel
+# load the pre-trained model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
+model = AutoModel.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
 # set the app title
 st.title("Brazilian Portuguese Sentence Similarity Checker")
 # check if both sentences are not empty
 if sentence1 and sentence2:
     # tokenize the sentences and get their IDs
+    input_ids = tokenizer.encode_plus(sentence1, sentence2, padding='max_length', truncation=True, return_tensors='pt')
+    # pass the IDs through the model to get the embeddings
     with torch.no_grad():
+        embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0]
+    # calculate the cosine similarity between the embeddings
+    similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1]).item()
+    # display the predicted similarity to the user
+    st.write("Similarity score between the sentences:", similarity)