Spaces:
Runtime error
Runtime error
Commit
·
5daf8df
1
Parent(s):
16dfa40
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,22 @@
|
|
1 |
import streamlit as st
|
2 |
|
3 |
-
# Library for Sentence Similarity
|
4 |
-
import pandas as pd
|
5 |
-
from sentence_transformers import SentenceTransformer
|
6 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
# Library for Entailment
|
9 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
10 |
import torch
|
11 |
|
12 |
|
|
|
|
|
|
|
13 |
|
14 |
# Load models and tokenisers for both sentence transformers and text classification
|
15 |
|
16 |
-
sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
|
17 |
|
18 |
tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
|
19 |
|
@@ -49,29 +52,29 @@ if sidebar_selectbox == "Compare two sentences":
|
|
49 |
|
50 |
print("Comparing sentences...")
|
51 |
|
52 |
-
### Compare Sentence Similarity ###
|
53 |
|
54 |
-
# Perform calculations
|
55 |
|
56 |
-
#Initialise sentences
|
57 |
-
sentences = []
|
58 |
|
59 |
-
# Append input sentences to 'sentences' list
|
60 |
-
sentences.append(sentence_1)
|
61 |
-
sentences.append(sentence_2)
|
62 |
|
63 |
-
# Create embeddings for both sentences
|
64 |
-
sentence_embeddings = sentence_transformer_model.encode(sentences)
|
65 |
|
66 |
-
cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
|
67 |
-
cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
|
68 |
|
69 |
|
70 |
-
# st.write('Similarity between "{}" and "{}" is {}%'.format(sentence_1,
|
71 |
-
# sentence_2, cos_sim))
|
72 |
|
73 |
-
st.subheader("Similarity")
|
74 |
-
st.write(f"Similarity between the two sentences is {cos_sim}%.")
|
75 |
|
76 |
|
77 |
### Text classification - entailment, neutral or contradiction ###
|
@@ -100,6 +103,22 @@ if sidebar_selectbox == "Compare two sentences":
|
|
100 |
st.write(text_classification_model.config.id2label[2], ":", round(outputs[0][2].item()*100,2),"%")
|
101 |
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
|
105 |
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
# # Library for Sentence Similarity
|
4 |
+
# import pandas as pd
|
5 |
+
# from sentence_transformers import SentenceTransformer
|
6 |
+
# from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
# Library for Entailment
|
9 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
10 |
import torch
|
11 |
|
12 |
|
13 |
+
# # Library for keyword extraction
|
14 |
+
# import yake
|
15 |
+
|
16 |
|
17 |
# Load models and tokenisers for both sentence transformers and text classification
|
18 |
|
19 |
+
# sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
|
20 |
|
21 |
tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")
|
22 |
|
|
|
52 |
|
53 |
print("Comparing sentences...")
|
54 |
|
55 |
+
# ### Compare Sentence Similarity ###
|
56 |
|
57 |
+
# # Perform calculations
|
58 |
|
59 |
+
# #Initialise sentences
|
60 |
+
# sentences = []
|
61 |
|
62 |
+
# # Append input sentences to 'sentences' list
|
63 |
+
# sentences.append(sentence_1)
|
64 |
+
# sentences.append(sentence_2)
|
65 |
|
66 |
+
# # Create embeddings for both sentences
|
67 |
+
# sentence_embeddings = sentence_transformer_model.encode(sentences)
|
68 |
|
69 |
+
# cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
|
70 |
+
# cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
|
71 |
|
72 |
|
73 |
+
# # st.write('Similarity between "{}" and "{}" is {}%'.format(sentence_1,
|
74 |
+
# # sentence_2, cos_sim))
|
75 |
|
76 |
+
# st.subheader("Similarity")
|
77 |
+
# st.write(f"Similarity between the two sentences is {cos_sim}%.")
|
78 |
|
79 |
|
80 |
### Text classification - entailment, neutral or contradiction ###
|
|
|
103 |
st.write(text_classification_model.config.id2label[2], ":", round(outputs[0][2].item()*100,2),"%")
|
104 |
|
105 |
|
106 |
+
### Extract keywords with YAKE ### (might make more sense with word cloud)
|
107 |
+
|
108 |
+
st.subheader("Keywords:")
|
109 |
+
|
110 |
+
kw_extractor = yake.KeywordExtractor(top=10, stopwords=None)
|
111 |
+
keywords = kw_extractor.extract_keywords(sentence_2)
|
112 |
+
|
113 |
+
# keywords_array = []
|
114 |
+
|
115 |
+
for kw, v in keywords:
|
116 |
+
# print("Keyphrase: ", kw, ": score", v)
|
117 |
+
# keywords_array.append(kw)
|
118 |
+
|
119 |
+
st.write(kw)
|
120 |
+
|
121 |
+
|
122 |
|
123 |
|
124 |
|