File size: 1,934 Bytes
d727a16
4b1c6eb
d727a16
2430162
 
 
00e69cc
 
 
2430162
 
 
 
 
336ea26
2430162
d727a16
581cd9f
00e69cc
68fd8e0
4b1c6eb
 
3dfdc35
 
5b6d5fb
d952d3e
ba3e19e
3dfdc35
 
 
 
ba3e19e
 
3dfdc35
e7c7d84
 
 
 
 
 
a3ce6a9
e7c7d84
 
 
 
3152fdc
e7c7d84
 
 
 
90a444c
 
 
 
 
 
94f8c57
90a444c
 
 
 
94f8c57
 
e7c7d84
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import streamlit as st
import pandas as pd

from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

input_sentence = st.text_input('Movie title', 'Life of Brian')
#st.write('The current movie title is', title)

#Sentences we want to encode. Example:
sentence = ['This framework generates embeddings for each input sentence']


#Sentences are encoded by calling model.encode()
embedding = model.encode([input_sentence])

x = st.slider('Select a value')
#embedding = model.encode(input_sentence)
#st.write(x, 'squared is', x * x, 'embedding', embedding[0][0])
st.write('The embedding of', '"' + input_sentence + '"', 'at position',x,'is',embedding[0][int(x)])


uploaded_file1 = st.file_uploader("Choose a file: sentence list")
if uploaded_file1 is not None:
    #read csv
    df1=pd.read_csv(uploaded_file1)
    st.write(df1.head())

uploaded_file2 = st.file_uploader("Choose a file: topic list")
if uploaded_file2 is not None:
    #read csv
    df2=pd.read_csv(uploaded_file2)
    st.write(df2.head())

if uploaded_file1 is not None and uploaded_file2 is not None:
    from sentence_transformers import SentenceTransformer, util
    import torch

    embedder = SentenceTransformer('all-MiniLM-L6-v2')
    corpus = df1['sentence']
    topics = df2['topic']

    corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
    for topic in topics:
        topic_embedding = embedder.encode(topic, convert_to_tensor=True)
        cos_scores = util.cos_sim(topic_embedding, corpus_embeddings)[0]
        df1[str(topic)] = cos_scores

    st.write(df1)

    @st.cache
    def convert_df_to_csv(df):
      # IMPORTANT: Cache the conversion to prevent computation on every rerun
      return df.to_csv().encode('utf-8')


    st.download_button(
      label="Download data as CSV",
      data=convert_df_to_csv(df1),
      file_name='output.csv',
      mime='text/csv',
    )