emiliosheinz commited on
Commit
9c7a582
·
1 Parent(s): e922469

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -1,10 +1,10 @@
1
- import torch
2
  import streamlit as st
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
4
 
5
- # load the pre-trained and fine-tuned model and tokenizer
6
- tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
7
- model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
8
 
9
  # set the app title
10
  st.title("Brazilian Portuguese Sentence Similarity Checker")
@@ -16,15 +16,14 @@ sentence2 = st.text_input("Enter the second sentence:")
16
  # check if both sentences are not empty
17
  if sentence1 and sentence2:
18
  # tokenize the sentences and get their IDs
19
- input_ids = tokenizer.encode(sentence1, sentence2, truncation=True, padding=True, return_tensors='pt')
20
 
21
- # pass the IDs through the model to get the logits
22
  with torch.no_grad():
23
- logits = model(input_ids)[0]
24
 
25
- # apply softmax to the logits to get the predicted probabilities
26
- probs = torch.softmax(logits, dim=1).squeeze().tolist()
27
 
28
- # display the predicted probabilities to the user
29
- st.write("Probability that the sentences are similar:", probs[1])
30
- st.write("Probability that the sentences are dissimilar:", probs[0])
 
 
1
  import streamlit as st
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModel
4
 
5
+ # load the pre-trained model and tokenizer
6
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
7
+ model = AutoModel.from_pretrained('sentence-transformers/distiluse-base-multilingual-cased-v1')
8
 
9
  # set the app title
10
  st.title("Brazilian Portuguese Sentence Similarity Checker")
 
16
  # check if both sentences are not empty
17
  if sentence1 and sentence2:
18
  # tokenize the sentences and get their IDs
19
+ input_ids = tokenizer.encode_plus(sentence1, sentence2, padding='max_length', truncation=True, return_tensors='pt')
20
 
21
+ # pass the IDs through the model to get the embeddings
22
  with torch.no_grad():
23
+ embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0]
24
 
25
+ # calculate the cosine similarity between the embeddings
26
+ similarity = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1]).item()
27
 
28
+ # display the predicted similarity to the user
29
+ st.write("Similarity score between the sentences:", similarity)