Spaces:
Sleeping
Sleeping
File size: 1,934 Bytes
d727a16 4b1c6eb d727a16 2430162 00e69cc 2430162 336ea26 2430162 d727a16 581cd9f 00e69cc 68fd8e0 4b1c6eb 3dfdc35 5b6d5fb d952d3e ba3e19e 3dfdc35 ba3e19e 3dfdc35 e7c7d84 a3ce6a9 e7c7d84 3152fdc e7c7d84 90a444c 94f8c57 90a444c 94f8c57 e7c7d84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
input_sentence = st.text_input('Movie title', 'Life of Brian')
#st.write('The current movie title is', title)
#Sentences we want to encode. Example:
sentence = ['This framework generates embeddings for each input sentence']
#Sentences are encoded by calling model.encode()
embedding = model.encode([input_sentence])
x = st.slider('Select a value')
#embedding = model.encode(input_sentence)
#st.write(x, 'squared is', x * x, 'embedding', embedding[0][0])
st.write('The embedding of', '"' + input_sentence + '"', 'at position',x,'is',embedding[0][int(x)])
uploaded_file1 = st.file_uploader("Choose a file: sentence list")
if uploaded_file1 is not None:
#read csv
df1=pd.read_csv(uploaded_file1)
st.write(df1.head())
uploaded_file2 = st.file_uploader("Choose a file: topic list")
if uploaded_file2 is not None:
#read csv
df2=pd.read_csv(uploaded_file2)
st.write(df2.head())
if uploaded_file1 is not None and uploaded_file2 is not None:
from sentence_transformers import SentenceTransformer, util
import torch
embedder = SentenceTransformer('all-MiniLM-L6-v2')
corpus = df1['sentence']
topics = df2['topic']
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
for topic in topics:
topic_embedding = embedder.encode(topic, convert_to_tensor=True)
cos_scores = util.cos_sim(topic_embedding, corpus_embeddings)[0]
df1[str(topic)] = cos_scores
st.write(df1)
@st.cache
def convert_df_to_csv(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
st.download_button(
label="Download data as CSV",
data=convert_df_to_csv(df1),
file_name='output.csv',
mime='text/csv',
)
|