hacpdsae2023 commited on
Commit
e7c7d84
·
1 Parent(s): 48d4abb

Computing cosine distance to topics

Browse files
Files changed (1) hide show
  1. app.py +19 -2
app.py CHANGED
@@ -23,12 +23,29 @@ st.write('The embedding of', '"' + input_sentence + '"', 'at position',x,'is',em
23
  uploaded_file1 = st.file_uploader("Choose a file: sentence list")
24
  if uploaded_file1 is not None:
25
  #read csv
26
- df1=pd.read_csv(uploaded_file1)
27
  st.write(df1)
28
 
29
  uploaded_file2 = st.file_uploader("Choose a file: topic list")
30
  if uploaded_file2 is not None:
31
  #read csv
32
- df2=pd.read_csv(uploaded_file2)
33
  st.write(df2)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  uploaded_file1 = st.file_uploader("Choose a file: sentence list")
24
  if uploaded_file1 is not None:
25
  #read csv
26
+ df1=pd.read_csv(uploaded_file1.head())
27
  st.write(df1)
28
 
29
  uploaded_file2 = st.file_uploader("Choose a file: topic list")
30
  if uploaded_file2 is not None:
31
  #read csv
32
+ df2=pd.read_csv(uploaded_file2.head())
33
  st.write(df2)
34
 
35
+ if uploaded_file1 is not None and uploaded_file2 is not None:
36
+ from sentence_transformers import SentenceTransformer, util
37
+ import torch
38
+
39
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
40
+ corpus = df1['sentence']
41
+ topics = df2['topics']
42
+
43
+ corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
44
+ for topic in topics:
45
+ topic_embedding = embedder.encode(topic, convert_to_tensor=True)
46
+ cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
47
+ df1[str(topic)] = cos_scores
48
+
49
+ st.write(df1)
50
+
51
+