Spaces:
Runtime error
Runtime error
Commit
·
b2c3199
1
Parent(s):
971b1fb
added UMAP
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
"""
|
2 |
A Streamlit application to visualize sentence embeddings
|
|
|
3 |
Author: Mohit Mayank
|
4 |
Contact: [email protected]
|
5 |
"""
|
@@ -16,6 +17,7 @@ import plotly.express as px
|
|
16 |
# DR
|
17 |
from sklearn.decomposition import PCA
|
18 |
from sklearn.manifold import TSNE
|
|
|
19 |
|
20 |
## Init
|
21 |
## ----------------
|
@@ -57,7 +59,6 @@ supported_models = ['all-MiniLM-L6-v2', 'paraphrase-albert-small-v2', 'paraphras
|
|
57 |
selected_model_option = st.sidebar.selectbox("Select Model:", supported_models)
|
58 |
text_col_name = st.sidebar.text_input("Text column to embed")
|
59 |
if len(text_col_name) > 0 and df is not None:
|
60 |
-
print("text_col_name -->", text_col_name)
|
61 |
df[text_col_name] = df[text_col_name].str.wrap(30)
|
62 |
df[text_col_name] = df[text_col_name].apply(lambda x: x.replace('\n', '<br>'))
|
63 |
progress = st.empty()
|
@@ -68,7 +69,7 @@ if len(text_col_name) > 0 and df is not None:
|
|
68 |
|
69 |
## Visualization
|
70 |
st.sidebar.markdown("## Visualization")
|
71 |
-
dr_algo = st.sidebar.selectbox("Dimensionality Reduction Algorithm", ('PCA', 't-SNE'))
|
72 |
color_col = st.sidebar.text_input("Color using this col")
|
73 |
if len(color_col.strip()) == 0:
|
74 |
color_col = None
|
@@ -81,6 +82,11 @@ if st.sidebar.button('Plot!'):
|
|
81 |
elif dr_algo == 't-SNE':
|
82 |
tsne = TSNE(n_components=2)
|
83 |
reduced_embeddings = tsne.fit_transform(embeddings)
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
# modify the df
|
86 |
# df['complete_embeddings'] = embeddings
|
@@ -94,4 +100,4 @@ if st.sidebar.button('Plot!'):
|
|
94 |
fig.update_layout(yaxis={'visible': False, 'showticklabels': False})
|
95 |
fig.update_layout(xaxis={'visible': False, 'showticklabels': False})
|
96 |
fig.update_traces(marker=dict(size=10, opacity=0.7, line=dict(width=1,color='DarkSlateGrey')),selector=dict(mode='markers'))
|
97 |
-
st.plotly_chart(fig, use_container_width=True)
|
|
|
1 |
"""
|
2 |
A Streamlit application to visualize sentence embeddings
|
3 |
+
|
4 |
Author: Mohit Mayank
|
5 |
Contact: [email protected]
|
6 |
"""
|
|
|
17 |
# DR
|
18 |
from sklearn.decomposition import PCA
|
19 |
from sklearn.manifold import TSNE
|
20 |
+
import umap.umap_ as umap
|
21 |
|
22 |
## Init
|
23 |
## ----------------
|
|
|
59 |
selected_model_option = st.sidebar.selectbox("Select Model:", supported_models)
|
60 |
text_col_name = st.sidebar.text_input("Text column to embed")
|
61 |
if len(text_col_name) > 0 and df is not None:
|
|
|
62 |
df[text_col_name] = df[text_col_name].str.wrap(30)
|
63 |
df[text_col_name] = df[text_col_name].apply(lambda x: x.replace('\n', '<br>'))
|
64 |
progress = st.empty()
|
|
|
69 |
|
70 |
## Visualization
|
71 |
st.sidebar.markdown("## Visualization")
|
72 |
+
dr_algo = st.sidebar.selectbox("Dimensionality Reduction Algorithm", ('PCA', 't-SNE', 'UMAP'))
|
73 |
color_col = st.sidebar.text_input("Color using this col")
|
74 |
if len(color_col.strip()) == 0:
|
75 |
color_col = None
|
|
|
82 |
elif dr_algo == 't-SNE':
|
83 |
tsne = TSNE(n_components=2)
|
84 |
reduced_embeddings = tsne.fit_transform(embeddings)
|
85 |
+
elif dr_algo == 'UMAP':
|
86 |
+
reducer = umap.UMAP(random_state=42)
|
87 |
+
reducer.fit(embeddings)
|
88 |
+
reduced_embeddings = reducer.transform(embeddings)
|
89 |
+
|
90 |
|
91 |
# modify the df
|
92 |
# df['complete_embeddings'] = embeddings
|
|
|
100 |
fig.update_layout(yaxis={'visible': False, 'showticklabels': False})
|
101 |
fig.update_layout(xaxis={'visible': False, 'showticklabels': False})
|
102 |
fig.update_traces(marker=dict(size=10, opacity=0.7, line=dict(width=1,color='DarkSlateGrey')),selector=dict(mode='markers'))
|
103 |
+
st.plotly_chart(fig, use_container_width=True)
|