Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -188,39 +188,39 @@ if file_text:
|
|
188 |
with st.expander("📑 Context Table"):
|
189 |
display_context_table(context_words)
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
|
226 |
st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")
|
|
|
188 |
with st.expander("📑 Context Table"):
|
189 |
display_context_table(context_words)
|
190 |
|
191 |
+
with st.expander("📝 Sentence Clustering", expanded=True):
|
192 |
+
sentences = [line.strip() for line in text_without_timestamps.split('\n') if len(line.strip()) > 10]
|
193 |
+
|
194 |
+
num_sentences = len(sentences)
|
195 |
+
st.write(f"Total Sentences: {num_sentences}")
|
196 |
+
|
197 |
+
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
198 |
+
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
199 |
+
|
200 |
+
col1, col2 = st.columns(2)
|
201 |
+
|
202 |
+
with col1:
|
203 |
+
st.subheader("Original Text")
|
204 |
+
original_text = "\n".join(sentences)
|
205 |
+
st.text_area("Original Sentences", value=original_text, height=400)
|
206 |
+
|
207 |
+
with col2:
|
208 |
+
st.subheader("Clustered Text")
|
209 |
+
clustered_text = ""
|
210 |
+
cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
|
211 |
+
|
212 |
+
for i, cluster in enumerate(clustered_sentences):
|
213 |
+
cluster_text = "\n".join(cluster)
|
214 |
+
high_info_words = ", ".join(cluster_high_info_words[i])
|
215 |
+
clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
|
216 |
+
|
217 |
+
st.text_area("Clustered Sentences", value=clustered_text, height=400)
|
218 |
+
|
219 |
+
# Verify that all sentences are accounted for in the clustered output
|
220 |
+
clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
|
221 |
+
if set(sentences) == set(clustered_sentences_flat):
|
222 |
+
st.write("✅ All sentences are accounted for in the clustered output.")
|
223 |
+
else:
|
224 |
+
st.write("❌ Some sentences are missing in the clustered output.")
|
225 |
|
226 |
st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")
|