kambris commited on
Commit
a1fcd63
·
verified ·
1 Parent(s): afa7452

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -2
app.py CHANGED
@@ -214,7 +214,6 @@ def process_and_summarize(df, top_n=50, topic_strategy="Auto", n_topics=None, mi
214
  "n_gram_range": (1, 3),
215
  "top_n_words": 15,
216
  "verbose": True,
217
- "diversity": 0.5,
218
  "stop_words": ARABIC_STOP_WORDS
219
  }
220
 
@@ -318,8 +317,12 @@ def main():
318
  )
319
 
320
  if topic_strategy == "Manual":
 
321
  n_documents = len(df)
322
- max_topics = min(500, int(np.log10(n_documents) * 100))
 
 
 
323
 
324
  n_topics = st.slider(
325
  "Number of Topics",
@@ -328,6 +331,13 @@ def main():
328
  value=min(20, max_topics),
329
  help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
330
  )
 
 
 
 
 
 
 
331
 
332
  with col2:
333
  top_n = st.number_input(
 
214
  "n_gram_range": (1, 3),
215
  "top_n_words": 15,
216
  "verbose": True,
 
217
  "stop_words": ARABIC_STOP_WORDS
218
  }
219
 
 
317
  )
318
 
319
  if topic_strategy == "Manual":
320
+ # Calculate reasonable max topics based on dataset size
321
  n_documents = len(df)
322
+ if n_documents < 1000:
323
+ max_topics = min(50, n_documents // 20)
324
+ else:
325
+ max_topics = min(500, int(np.log10(n_documents) * 100))
326
 
327
  n_topics = st.slider(
328
  "Number of Topics",
 
331
  value=min(20, max_topics),
332
  help=f"Select the desired number of topics (max {max_topics} based on dataset size)"
333
  )
334
+
335
+ st.info(f"""
336
+ 💡 For your dataset of {n_documents:,} documents:
337
+ - Minimum topics: 2
338
+ - Maximum topics: {max_topics}
339
+ - Recommended range: {max(2, max_topics//5)}-{max_topics//2}
340
+ """)
341
 
342
  with col2:
343
  top_n = st.number_input(