Spaces:
Sleeping
Sleeping
File size: 1,825 Bytes
935a660 eabf510 82413ee eabf510 0b6790a 4480f3c d0d62c4 4480f3c 09e6c30 ee5ab0e 09e6c30 eabf510 91b81a0 eabf510 4480f3c ee5ab0e eabf510 ee5ab0e eabf510 ee5ab0e 78dfe4e 4168128 eabf510 4480f3c 935a660 f2e9695 eabf510 5b71e40 b5f7332 f5f665e ee4ac1e f5f665e ee5ab0e 418bcdd ee5ab0e f414b62 ee5ab0e 7bc957d b26ceaa d0d62c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import streamlit as st
import plotly.express as px
import pandas as pd
import random
import logging
from sentence_transformers import SentenceTransformer, util
from datasets import load_dataset
@st.cache_resource
def load_model(name):
return SentenceTransformer(name)
@st.cache_data
def load_words_dataset():
dataset = load_dataset("marksverdhei/wordnet-definitions-en-2021", split="train")
return dataset["Word"]
@st.cache_data
def choose_secret_word():
all_words = load_words_dataset()
return random.choice(all_words)
all_words = load_words_dataset()
model_names = [
'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
'BAAI/bge-small-en-v1.5'
]
models = {
name: load_model(name) for name in model_names
}
secret_word =choose_secret_word().lower().strip()
secret_embedding = [models[name].encode(secret_word) for name in model_names]
print("Secret word ", secret_word)
if 'words' not in st.session_state:
st.session_state['words'] = []
st.write('Try to guess a secret word by semantic similarity')
word = st.text_input("Input a word")
used_words = [w[0] for w in st.session_state['words']]
if st.button("Guess") or word:
if word not in used_words:
word_embedding = [models[name].encode(word.lower().strip()) for name in model_names]
similarities = [util.pytorch_cos_sim(secret_embedding[i], word_embedding[i]).cpu().numpy()[0][0] for i, name in enumerate(model_names)]
st.session_state['words'].append([str(word)] + similarities)
words_df = pd.DataFrame(
st.session_state['words'],
columns=["word"] + ["Similarity for " + name for name in model_names]
).sort_values(by=["Similarity for sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"], ascending=False)
st.dataframe(words_df, use_container_width=True)
|