mohitmayank commited on
Commit
b2c3199
·
1 Parent(s): 971b1fb

added UMAP

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -1,5 +1,6 @@
1
  """
2
  A Streamlit application to visualize sentence embeddings
 
3
  Author: Mohit Mayank
4
  Contact: [email protected]
5
  """
@@ -16,6 +17,7 @@ import plotly.express as px
16
  # DR
17
  from sklearn.decomposition import PCA
18
  from sklearn.manifold import TSNE
 
19
 
20
  ## Init
21
  ## ----------------
@@ -57,7 +59,6 @@ supported_models = ['all-MiniLM-L6-v2', 'paraphrase-albert-small-v2', 'paraphras
57
  selected_model_option = st.sidebar.selectbox("Select Model:", supported_models)
58
  text_col_name = st.sidebar.text_input("Text column to embed")
59
  if len(text_col_name) > 0 and df is not None:
60
- print("text_col_name -->", text_col_name)
61
  df[text_col_name] = df[text_col_name].str.wrap(30)
62
  df[text_col_name] = df[text_col_name].apply(lambda x: x.replace('\n', '<br>'))
63
  progress = st.empty()
@@ -68,7 +69,7 @@ if len(text_col_name) > 0 and df is not None:
68
 
69
  ## Visualization
70
  st.sidebar.markdown("## Visualization")
71
- dr_algo = st.sidebar.selectbox("Dimensionality Reduction Algorithm", ('PCA', 't-SNE'))
72
  color_col = st.sidebar.text_input("Color using this col")
73
  if len(color_col.strip()) == 0:
74
  color_col = None
@@ -81,6 +82,11 @@ if st.sidebar.button('Plot!'):
81
  elif dr_algo == 't-SNE':
82
  tsne = TSNE(n_components=2)
83
  reduced_embeddings = tsne.fit_transform(embeddings)
 
 
 
 
 
84
 
85
  # modify the df
86
  # df['complete_embeddings'] = embeddings
@@ -94,4 +100,4 @@ if st.sidebar.button('Plot!'):
94
  fig.update_layout(yaxis={'visible': False, 'showticklabels': False})
95
  fig.update_layout(xaxis={'visible': False, 'showticklabels': False})
96
  fig.update_traces(marker=dict(size=10, opacity=0.7, line=dict(width=1,color='DarkSlateGrey')),selector=dict(mode='markers'))
97
- st.plotly_chart(fig, use_container_width=True)
 
1
  """
2
  A Streamlit application to visualize sentence embeddings
3
+
4
  Author: Mohit Mayank
5
  Contact: [email protected]
6
  """
 
17
  # DR
18
  from sklearn.decomposition import PCA
19
  from sklearn.manifold import TSNE
20
+ import umap.umap_ as umap
21
 
22
  ## Init
23
  ## ----------------
 
59
  selected_model_option = st.sidebar.selectbox("Select Model:", supported_models)
60
  text_col_name = st.sidebar.text_input("Text column to embed")
61
  if len(text_col_name) > 0 and df is not None:
 
62
  df[text_col_name] = df[text_col_name].str.wrap(30)
63
  df[text_col_name] = df[text_col_name].apply(lambda x: x.replace('\n', '<br>'))
64
  progress = st.empty()
 
69
 
70
  ## Visualization
71
  st.sidebar.markdown("## Visualization")
72
+ dr_algo = st.sidebar.selectbox("Dimensionality Reduction Algorithm", ('PCA', 't-SNE', 'UMAP'))
73
  color_col = st.sidebar.text_input("Color using this col")
74
  if len(color_col.strip()) == 0:
75
  color_col = None
 
82
  elif dr_algo == 't-SNE':
83
  tsne = TSNE(n_components=2)
84
  reduced_embeddings = tsne.fit_transform(embeddings)
85
+ elif dr_algo == 'UMAP':
86
+ reducer = umap.UMAP(random_state=42)
87
+ reducer.fit(embeddings)
88
+ reduced_embeddings = reducer.transform(embeddings)
89
+
90
 
91
  # modify the df
92
  # df['complete_embeddings'] = embeddings
 
100
  fig.update_layout(yaxis={'visible': False, 'showticklabels': False})
101
  fig.update_layout(xaxis={'visible': False, 'showticklabels': False})
102
  fig.update_traces(marker=dict(size=10, opacity=0.7, line=dict(width=1,color='DarkSlateGrey')),selector=dict(mode='markers'))
103
+ st.plotly_chart(fig, use_container_width=True)