Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer | |
model = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
input_sentence = st.text_input('Movie title', 'Life of Brian') | |
#st.write('The current movie title is', title) | |
#Sentences we want to encode. Example: | |
sentence = ['This framework generates embeddings for each input sentence'] | |
#Sentences are encoded by calling model.encode() | |
embedding = model.encode([input_sentence]) | |
x = st.slider('Select a value') | |
#embedding = model.encode(input_sentence) | |
#st.write(x, 'squared is', x * x, 'embedding', embedding[0][0]) | |
st.write('The embedding of', '"' + input_sentence + '"', 'at position',x,'is',embedding[0][int(x)]) | |
uploaded_file1 = st.file_uploader("Choose a file: sentence list") | |
if uploaded_file1 is not None: | |
#read csv | |
df1=pd.read_csv(uploaded_file1) | |
st.write(df1.head()) | |
uploaded_file2 = st.file_uploader("Choose a file: topic list") | |
if uploaded_file2 is not None: | |
#read csv | |
df2=pd.read_csv(uploaded_file2) | |
st.write(df2.head()) | |
if uploaded_file1 is not None and uploaded_file2 is not None: | |
from sentence_transformers import SentenceTransformer, util | |
import torch | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
corpus = df1['sentence'] | |
topics = df2['topic'] | |
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) | |
for topic in topics: | |
topic_embedding = embedder.encode(topic, convert_to_tensor=True) | |
cos_scores = util.cos_sim(topic_embedding, corpus_embeddings)[0] | |
df1[str(topic)] = cos_scores | |
st.write(df1) | |
def convert_df_to_csv(df): | |
# IMPORTANT: Cache the conversion to prevent computation on every rerun | |
return df.to_csv().encode('utf-8') | |
st.download_button( | |
label="Download data as CSV", | |
data=convert_df_to_csv(df1), | |
file_name='output.csv', | |
mime='text/csv', | |
) | |