npc0 commited on
Commit
d970538
Β·
verified Β·
1 Parent(s): fb3ae30

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +543 -16
src/streamlit_app.py CHANGED
@@ -1,10 +1,15 @@
1
  import os
2
  os.environ["STREAMLIT_GLOBAL_CONFIG"] = "/data/.streamlit/config.toml"
 
3
  import uuid
4
  import random
5
  import urllib.parse # To parse URL parameters
 
6
  import streamlit as st
 
 
7
  import duckdb
 
8
 
9
  # Database file path
10
  DB_PATH = 'steampolis.duckdb'
@@ -92,8 +97,473 @@ def initialize_database():
92
  if 'init_con' in locals() and init_con:
93
  init_con.close()
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # Helper function to get a random unvoted comment
96
  def get_random_unvoted_comment(user_id, topic_id):
 
 
 
 
 
97
  local_con = None
98
  try:
99
  local_con = duckdb.connect(database=DB_PATH, read_only=False)
@@ -104,7 +574,7 @@ def get_random_unvoted_comment(user_id, topic_id):
104
  """, [topic_id]).fetchone()[0]
105
 
106
  if comment_count == 0:
107
- return None, "No comments in this topic yet."
108
 
109
  # Attempt to get a random comment that the user has NOT voted on
110
  result = local_con.execute("""
@@ -118,8 +588,27 @@ def get_random_unvoted_comment(user_id, topic_id):
118
  ORDER BY RANDOM()
119
  LIMIT 1
120
  """, [topic_id, user_id]).fetchone()
121
-
122
  if result:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  # Found an unvoted comment
124
  return result[0], result[1]
125
  else:
@@ -220,12 +709,12 @@ def home_page():
220
  st.title("Welcome to SteamPolis")
221
  st.markdown("Choose an option:")
222
 
223
- if st.button("Create New Topic"):
224
  st.session_state.page = 'create_topic'
225
  st.rerun()
226
 
227
  st.markdown("---")
228
- st.markdown("Or join an existing topic:")
229
  topic_input = st.text_input("Enter Topic ID or URL")
230
 
231
  if st.button("Join Topic"):
@@ -308,6 +797,7 @@ def view_topic_page():
308
  current_comment_id = st.session_state.get('current_comment_id')
309
  current_comment_content = st.session_state.get('current_comment_content', "Loading comments...")
310
  comment_history = st.session_state.get('comment_history', "")
 
311
 
312
  if not topic_id:
313
  st.warning("No topic selected. Returning to home.")
@@ -342,7 +832,7 @@ def view_topic_page():
342
 
343
 
344
  # Include functional information
345
- st.markdown(f"**Quest Scroll ID:** `{topic_id}`")
346
  # Construct shareable link using current app URL
347
  app_url = st.query_params.get('base', ['http://localhost:8501/'])[0] # Get base URL if available
348
  shareable_link = f"{app_url}?topic={topic_id}" if app_url else f"?topic={topic_id}"
@@ -436,24 +926,50 @@ def view_topic_page():
436
  ]
437
  # Randomly select a phrase
438
  random_phrase = random.choice(intro_phrases)
 
439
 
440
  if current_comment_id: # Only show voting if there's a comment to vote on
441
  # Display comment history and the current comment with the random intro
442
- st.markdown(f"{comment_history}\n\n[Collected new insight, {random_phrase}]:\n* {current_comment_content}")
 
 
 
 
 
 
443
 
444
  # Handle vote logic
445
  def handle_vote(vote_type, comment_id, topic_id, user_id):
 
 
 
 
446
  local_con = None
447
  try:
448
  local_con = duckdb.connect(database=DB_PATH, read_only=False)
449
- vote_id = str(uuid.uuid4())
 
 
 
 
 
450
  local_con.execute("""
451
  INSERT INTO votes (id, user_id, comment_id, vote_type)
452
  VALUES (?, ?, ?, ?)
 
 
 
 
453
  """, [vote_id, user_id, comment_id, vote_type])
454
 
455
  # Append voted comment to history
 
 
456
  vote_text = "πŸ‘" if vote_type == "agree" else "πŸ‘Ž" if vote_type == "disagree" else "😐"
 
 
 
 
457
  st.session_state.comment_history += f"\n\n{vote_text} {current_comment_content}"
458
 
459
  # Check vote count and trigger special event
@@ -461,11 +977,14 @@ def view_topic_page():
461
  if 'vote_count' not in st.session_state:
462
  st.session_state.vote_count = 0
463
 
 
 
 
464
  st.session_state.vote_count += 1
465
 
466
- # Check if it's time for a potential special event (every 5 votes)
467
  if st.session_state.vote_count % 5 == 0:
468
- st.session_state.vote_count = 0
469
  # 30% chance to trigger the special sharing event
470
  if random.random() < 0.3:
471
  prompts = [
@@ -473,14 +992,17 @@ def view_topic_page():
473
  "A letter arrives from the Emperor's office, requesting your personal insight on the matter. What counsel do you offer?",
474
  "As you walk through the streets, people gather, eager to hear your thoughts on the Emperor's dilemma. What advice do you give?"
475
  ]
 
476
  share_wisdom(random.choice(prompts), allow_skip=True)
477
 
478
  # Get next comment
 
479
  next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
480
  st.session_state.current_comment_id = next_comment_id
481
  st.session_state.current_comment_content = next_comment_content
482
 
483
  # Update progress
 
484
  update_user_progress(user_id, topic_id, next_comment_id)
485
 
486
  st.rerun() # Rerun to update UI
@@ -531,19 +1053,24 @@ if 'current_comment_content' not in st.session_state:
531
  st.session_state.current_comment_content = "Loading comments..."
532
  if 'comment_history' not in st.session_state:
533
  st.session_state.comment_history = ""
 
 
534
 
535
  # Initialize the database on first run
536
  initialize_database()
537
 
538
  # Handle initial load from URL query parameters
 
539
  query_params = st.query_params
540
- if 'topic' in query_params and st.session_state.page == 'home':
541
- topic_id_from_url = query_params['topic']
542
- st.session_state.page = 'view_topic'
543
- st.session_state.current_topic_id = topic_id_from_url
544
- # The view_topic_page will handle loading user/comment based on session_state.user_email
545
- st.query_params = {} # Clear query params after processing
546
- st.rerun()
 
 
547
 
548
 
549
  # Render the appropriate page based on session state
 
1
  import os
2
  os.environ["STREAMLIT_GLOBAL_CONFIG"] = "/data/.streamlit/config.toml"
3
+ import time
4
  import uuid
5
  import random
6
  import urllib.parse # To parse URL parameters
7
+ from functools import lru_cache
8
  import streamlit as st
9
+ import numpy as np
10
+ import pandas as pd
11
  import duckdb
12
+ import hdbscan
13
 
14
  # Database file path
15
  DB_PATH = 'steampolis.duckdb'
 
97
  if 'init_con' in locals() and init_con:
98
  init_con.close()
99
 
100
+ def get_ttl_hash(seconds=360):
101
+ """Return the same value withing `seconds` time period"""
102
+ return round(time.time() / seconds)
103
+
104
+ # Helper function to get the R matrix from user voting data
105
+ # This matrix represents user-comment interactions (votes)
106
+ # Users are rows, comments are columns.
107
+ # Values: 1 for 'agree', 0 for 'neutral', -1 for 'disagree', NaN for unvoted.
108
+ # Requires pandas and numpy.
109
+ def get_r_matrix_from_votes():
110
+ local_con = None
111
+ try:
112
+ local_con = duckdb.connect(database=DB_PATH, read_only=True) # Read-only is sufficient
113
+
114
+ # Fetch all vote data
115
+ # fetchdf requires pandas
116
+ votes_df = local_con.execute("""
117
+ SELECT user_id, comment_id, vote_type
118
+ FROM votes
119
+ """).fetchdf()
120
+
121
+ if votes_df.empty:
122
+ # Return empty matrix and mappings if no votes exist
123
+ # pd.DataFrame requires pandas
124
+ return pd.DataFrame(), {}, {}
125
+
126
+ # Map vote types to numerical values
127
+ vote_mapping = {'agree': 1, 'neutral': 0, 'disagree': -1}
128
+ votes_df['vote_value'] = votes_df['vote_type'].map(vote_mapping)
129
+
130
+ # Create the R matrix using pivot_table
131
+ # This automatically handles missing user-comment pairs by filling with NaN
132
+ # pivot_table requires pandas
133
+ r_matrix = votes_df.pivot_table(
134
+ index='user_id',
135
+ columns='comment_id',
136
+ values='vote_value'
137
+ )
138
+
139
+ # Create mappings from user/comment IDs to matrix indices (optional but useful)
140
+ user_id_to_index = {user_id: i for i, user_id in enumerate(r_matrix.index)}
141
+ comment_id_to_index = {comment_id: i for i, comment_id in enumerate(r_matrix.columns)}
142
+
143
+ return r_matrix, user_id_to_index, comment_id_to_index
144
+
145
+ except Exception as e:
146
+ # st.error is not available here, just print or log
147
+ print(f"Error generating R matrix: {e}")
148
+ # Return empty results in case of error
149
+ # pd.DataFrame requires pandas
150
+ return pd.DataFrame(), {}, {}
151
+ finally:
152
+ if local_con:
153
+ local_con.close()
154
+
155
+
156
+ # Custom Hamming-like distance function handling NaNs for clustering
157
+ # Assumes numpy is imported as np
158
+ def hamming_distance_with_nan(u1, u2):
159
+ """
160
+ Calculates a Hamming-like distance between two vectors (user vote profiles)
161
+ ignoring positions where either value is NaN.
162
+
163
+ Args:
164
+ u1 (np.ndarray or pd.Series): First vector.
165
+ u2 (np.ndarray or pd.Series): Second vector.
166
+
167
+ Returns:
168
+ float: The proportion of differing elements among non-NaN positions.
169
+ Returns 0.0 if vectors are identical (including all NaN),
170
+ 1.0 if different but no common non-NaN positions.
171
+ """
172
+ u1 = np.asarray(u1)
173
+ u2 = np.asarray(u2)
174
+
175
+ # Find positions where both are not NaN
176
+ both_not_nan_mask = ~np.isnan(u1) & ~np.isnan(u2)
177
+
178
+ # If no common non-NaN values
179
+ if not np.any(both_not_nan_mask):
180
+ # If vectors are identical (e.g., both all NaN), distance is 0.
181
+ # If different vectors with no common non-NaN, distance is 1 (max difference).
182
+ if np.array_equal(u1, u2, equal_nan=True):
183
+ return 0.0
184
+ else:
185
+ return 1.0
186
+
187
+ # Filter to only positions where both are not NaN
188
+ u1_filtered = u1[both_not_nan_mask]
189
+ u2_filtered = u2[both_not_nan_mask]
190
+
191
+ # Calculate proportion of differing elements among common non-NaN positions
192
+ diff_count = np.sum(u1_filtered != u2_filtered)
193
+ total_count = len(u1_filtered)
194
+
195
+ return diff_count / total_count
196
+
197
+
198
+ # Function to get clusters using HDBSCAN with the custom Hamming distance
199
+ # Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
200
+ def get_clusters_from_r_matrix(r_matrix):
201
+ """
202
+ Performs HDBSCAN clustering on the R matrix using a custom Hamming-like distance
203
+ that handles NaN values.
204
+
205
+ Args:
206
+ r_matrix (pd.DataFrame): The user-comment vote matrix from get_r_matrix_from_votes.
207
+ Index should be user_id, columns comment_id.
208
+
209
+ Returns:
210
+ np.ndarray: An array of cluster labels for each user in the r_matrix index.
211
+ -1 indicates noise. Returns empty array if clustering fails or
212
+ r_matrix is empty.
213
+ """
214
+ # Check if r_matrix is empty
215
+ if r_matrix.empty:
216
+ print("R matrix is empty, cannot perform clustering.")
217
+ return np.array([])
218
+
219
+ try:
220
+ # Instantiate HDBSCAN with the custom metric
221
+ # Using default parameters for min_cluster_size and min_samples
222
+ # These might need tuning based on data characteristics and desired cluster granularity
223
+ # allow_single_cluster=True prevents an error if all points form one cluster
224
+ clusterer = hdbscan.HDBSCAN(metric=hamming_distance_with_nan, allow_single_cluster=True)
225
+
226
+ # Fit the model directly to the DataFrame values
227
+ # HDBSCAN fit expects a numpy array or similar structure
228
+ clusterer.fit(r_matrix.values)
229
+
230
+ # Return the cluster labels
231
+ return clusterer.labels_
232
+
233
+ except Exception as e:
234
+ # In a Streamlit app context, st.error would be better, but not available here.
235
+ # Print to console/logs.
236
+ print(f"Error during HDBSCAN clustering: {e}")
237
+ return np.array([]) # Return empty array on error
238
+
239
+
240
+ def get_cluster_labels():
241
+ r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
242
+ cluster_labels = get_clusters_from_r_matrix(r_matrix)
243
+ if len(cluster_labels) == 0:
244
+ cluster_labels = [0] * len(user_id_to_index)
245
+ return cluster_labels, user_id_to_index
246
+
247
+
248
+ # Function to get the cluster label for a specific user
249
+ @lru_cache()
250
+ def get_user_cluster_label(user_id, ttl_hash=None):
251
+ """
252
+ Gets the HDBSCAN cluster label for a specific user and a list of users
253
+ sharing the same cluster.
254
+
255
+ Args:
256
+ user_id (str): The ID of the user.
257
+
258
+ Returns:
259
+ tuple: A tuple containing:
260
+ - int or None: The cluster label (an integer, -1 for noise) if the user
261
+ is found in the clustering result, otherwise None.
262
+ - list[str]: A list of user IDs (including the input user_id if found)
263
+ that belong to the same cluster. Returns an empty list
264
+ if the user is not found or has no cluster label.
265
+ """
266
+ # get_cluster_labels is already cached, so calling it repeatedly is fine
267
+ cluster_labels, user_id_to_index = get_cluster_labels()
268
+
269
+ # Create a reverse mapping from index to user_id for easier lookup
270
+ index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}
271
+
272
+ target_cluster_label = None
273
+ same_cluster_users = []
274
+
275
+ # Check if the user_id exists in the mapping
276
+ if user_id in user_id_to_index:
277
+ user_index = user_id_to_index[user_id]
278
+ # Ensure the index is within the bounds of the cluster_labels array
279
+ if 0 <= user_index < len(cluster_labels):
280
+ target_cluster_label = int(cluster_labels[user_index]) # Get the target label
281
+
282
+ # Find all users with the same cluster label
283
+ for index, current_user_id in index_to_user_id.items():
284
+ # Ensure the index is valid for cluster_labels
285
+ if 0 <= index < len(cluster_labels):
286
+ current_user_label = int(cluster_labels[index])
287
+ if current_user_label == target_cluster_label:
288
+ same_cluster_users.append(current_user_id)
289
+ else:
290
+ # This case should ideally not happen if index_to_user_id is consistent
291
+ print(f"Warning: Index {index} from index_to_user_id out of bounds for cluster labels array length {len(cluster_labels)}")
292
+
293
+
294
+ else:
295
+ # This case should ideally not happen if user_id_to_index is consistent
296
+ print(f"Warning: User index {user_index} out of bounds for cluster labels array length {len(cluster_labels)}")
297
+ # Return None and empty list as user couldn't be processed
298
+ return None, []
299
+ else:
300
+ # User not found in the R matrix used for clustering (e.g., new user with no votes)
301
+ # print(f"User ID {user_id} not found in clustering data.") # Optional: for debugging
302
+ # Return None and empty list as user is not part of the current clustering result
303
+ return None, []
304
+
305
+ # Return the target user's label and the list of users in that cluster
306
+ return target_cluster_label, same_cluster_users
307
+
308
+
309
+ # Helper function to get top k most polarized comments for a list of users
310
+ def get_top_k_polarized_comments_for_users(user_ids, k=5):
311
+ """
312
+ Retrieves the top k comments most agreed or disagreed upon (most polarized)
313
+ by a given list of users.
314
+
315
+ Args:
316
+ user_ids (list[str]): A list of user IDs.
317
+ k (int): The number of top comments to retrieve.
318
+
319
+ Returns:
320
+ list[tuple]: A list of tuples, where each tuple contains
321
+ (comment_id, comment_content, average_vote_score),
322
+ ordered by the absolute value of the average score descending.
323
+ Returns an empty list if no votes are found for these users
324
+ or on error.
325
+ """
326
+ if not user_ids:
327
+ # print("Warning: get_top_k_polarized_comments_for_users called with empty user_ids list.") # Optional debug
328
+ return [] # Cannot query without user IDs
329
+
330
+ local_con = None
331
+ try:
332
+ local_con = duckdb.connect(database=DB_PATH, read_only=True)
333
+
334
+ # Use parameterized query for the list of user IDs
335
+ # DuckDB's Python API handles lists for IN clauses
336
+ query = """
337
+ SELECT
338
+ v.comment_id,
339
+ c.content,
340
+ AVG(CASE
341
+ WHEN v.vote_type = 'agree' THEN 1.0
342
+ WHEN v.vote_type = 'neutral' THEN 0.0
343
+ WHEN v.vote_type = 'disagree' THEN -1.0
344
+ ELSE NULL -- Should not happen with current data
345
+ END) as average_vote_score
346
+ FROM votes v
347
+ JOIN comments c ON v.comment_id = c.id
348
+ WHERE v.user_id IN (?)
349
+ GROUP BY v.comment_id, c.content
350
+ HAVING COUNT(v.user_id) > 0 -- Ensure at least one user from the list voted on this comment
351
+ ORDER BY ABS(average_vote_score) DESC
352
+ LIMIT ?
353
+ """
354
+ # Pass the list of user_ids and k as parameters
355
+ result = local_con.execute(query, [user_ids, k]).fetchall()
356
+
357
+ return result
358
+
359
+ except Exception as e:
360
+ # st.error is not available here, just print or log
361
+ print(f"Error getting top k polarized comments for users {user_ids}: {e}")
362
+ return [] # Return empty list on error
363
+ finally:
364
+ if local_con:
365
+ local_con.close()
366
+
367
+
368
+ @lru_cache()
369
+ def estimate_group_voting_diversity(user_ids, topic_id):
370
+ """
371
+ Estimates the diversity of voting within a group of users for a specific topic.
372
+ Diversity is measured by the average variance of numerical vote scores (-1, 0, 1)
373
+ across comments that at least two users in the group have voted on.
374
+
375
+ Args:
376
+ user_ids (list[str]): A list of user IDs belonging to the group.
377
+ topic_id (str): The ID of the topic.
378
+
379
+ Returns:
380
+ float: A diversity score between 0.0 and 1.0. 0.0 indicates no diversity
381
+ (all users voted the same way on all shared comments), 1.0 indicates
382
+ maximum possible diversity (e.g., half agree, half disagree on shared comments).
383
+ Returns 0.0 if the group has less than 2 users or if no comments
384
+ were voted on by at least two users in the group.
385
+ """
386
+ if not user_ids or len(user_ids) < 2:
387
+ return 0.0
388
+
389
+ local_con = None
390
+ try:
391
+ local_con = duckdb.connect(database=DB_PATH, read_only=True)
392
+
393
+ # Get all votes for the given topic by the specified users
394
+ # Join with comments to filter by topic_id
395
+ query = """
396
+ SELECT
397
+ v.comment_id,
398
+ v.user_id,
399
+ v.vote_type
400
+ FROM votes v
401
+ JOIN comments c ON v.comment_id = c.id
402
+ WHERE c.topic_id = ? AND v.user_id IN (?)
403
+ """
404
+ # DuckDB's Python API handles lists for IN clauses
405
+ results = local_con.execute(query, [topic_id, user_ids]).fetchall()
406
+
407
+ if not results:
408
+ return 0.0 # No votes found for this group on this topic
409
+
410
+ # Map vote types to numerical scores
411
+ vote_map = {'agree': 1.0, 'neutral': 0.0, 'disagree': -1.0}
412
+
413
+ # Group votes by comment ID
414
+ votes_by_comment = {}
415
+ for comment_id, user_id, vote_type in results:
416
+ if comment_id not in votes_by_comment:
417
+ votes_by_comment[comment_id] = []
418
+ # Append the numerical vote score
419
+ votes_by_comment[comment_id].append(vote_map.get(vote_type, 0.0)) # Default to 0.0 for unknown types
420
+
421
+ # Calculate variance for comments voted on by at least two users in the group
422
+ variances = []
423
+ for comment_id, comment_votes in votes_by_comment.items():
424
+ # Ensure the comment was voted on by at least two users from the input list
425
+ if len(comment_votes) >= 2:
426
+ # Use numpy to calculate variance
427
+ variances.append(np.var(comment_votes))
428
+
429
+ if not variances:
430
+ return 0.0 # No comments voted on by at least two users in the group
431
+
432
+ # The maximum possible variance for values in [-1, 0, 1] is 1.0
433
+ # (e.g., half votes are 1, half are -1).
434
+ # The average variance is already in the range [0, 1].
435
+ average_variance = np.mean(variances)
436
+
437
+ return average_variance
438
+
439
+ except Exception as e:
440
+ # st.error is not available here, just print or log
441
+ print(f"Error estimating group voting diversity for topic {topic_id} and users {user_ids}: {e}")
442
+ return 0.0 # Return 0.0 on error
443
+ finally:
444
+ if local_con:
445
+ local_con.close()
446
+
447
+
448
+ # Helper function to name a group of users based on their participation and voting diversity
449
+ def name_user_group(user_ids, topic_id):
450
+ """
451
+ Generates a descriptive name and description for a group of users within a
452
+ specific topic based on their participation level and voting diversity.
453
+
454
+ Args:
455
+ user_ids (list[str]): A list of user IDs belonging to the group.
456
+ topic_id (str): The ID of the topic.
457
+
458
+ Returns:
459
+ tuple[str, str]: A tuple containing the name and description for the group.
460
+ Returns ("Silent Gathering", "This group has no members.")
461
+ or ("Unengaged Group", "No members of this group have voted on this topic.")
462
+ or ("Isolated Voices", "This topic has no voters yet.")
463
+ or ("Mysterious Gathering", "An error occurred while trying to name this group.")
464
+ in edge cases or on error.
465
+ """
466
+ # Handle empty user list
467
+ if not user_ids:
468
+ return "Silent Gathering", "This group has no members."
469
+
470
+ local_con = None
471
+ try:
472
+ local_con = duckdb.connect(database=DB_PATH, read_only=True)
473
+
474
+ # 1. Get total unique users who voted in the topic
475
+ total_voters_result = local_con.execute("""
476
+ SELECT COUNT(DISTINCT user_id)
477
+ FROM votes v
478
+ JOIN comments c ON v.comment_id = c.id
479
+ WHERE c.topic_id = ?
480
+ """, [topic_id]).fetchone()
481
+ total_voters_in_topic = total_voters_result[0] if total_voters_result else 0
482
+
483
+ # 2. Get unique users from the input list who voted in the topic
484
+ # Filter user_ids to only those present in the votes table for this topic
485
+ # DuckDB IN clause handles lists directly
486
+ group_voters_result = local_con.execute("""
487
+ SELECT COUNT(DISTINCT user_id)
488
+ FROM votes v
489
+ JOIN comments c ON v.comment_id = c.id
490
+ WHERE c.topic_id = ? AND v.user_id IN (?)
491
+ """, [topic_id, user_ids]).fetchone()
492
+ group_voters_count = group_voters_result[0] if group_voters_result else 0
493
+
494
+ # Handle case where no one in the group has voted on this topic
495
+ if group_voters_count == 0:
496
+ return "Unengaged Group", "No members of this group have voted on this topic."
497
+
498
+ # Handle case where topic has no voters but the group somehow has voters (shouldn't happen if queries are correct)
499
+ if total_voters_in_topic == 0:
500
+ # This case is unlikely if group_voters_count > 0, but for safety
501
+ return "Isolated Voices", "This topic has no voters yet."
502
+
503
+
504
+ # 3. Calculate significance (proportion of group voters among all topic voters)
505
+ significance_proportion = group_voters_count / total_voters_in_topic
506
+
507
+ # 4. Get diversity score for the group
508
+ diversity_score = estimate_group_voting_diversity(user_ids, topic_id)
509
+
510
+ # 5. Determine name and description based on significance and diversity
511
+ # Define thresholds (can be tuned)
512
+ SIG_LOW_THRESHOLD = 0.1
513
+ SIG_MED_THRESHOLD = 0.5 # High if > MED, Med if > LOW and <= MED, Low if <= LOW
514
+ DIV_LOW_THRESHOLD = 0.2
515
+ DIV_MED_THRESHOLD = 0.5 # High if > MED, Med if > LOW and <= MED, Low if <= LOW
516
+
517
+ significance_level = "low"
518
+ if significance_proportion > SIG_MED_THRESHOLD:
519
+ significance_level = "high"
520
+ elif significance_proportion > SIG_LOW_THRESHOLD:
521
+ significance_level = "medium"
522
+
523
+ diversity_level = "low"
524
+ if diversity_score > DIV_MED_THRESHOLD:
525
+ diversity_level = "high"
526
+ elif diversity_score > DIV_LOW_THRESHOLD:
527
+ diversity_level = "medium"
528
+
529
+ # Assign names and descriptions based on levels
530
+ if significance_level == "high":
531
+ if diversity_level == "low":
532
+ return "Likeheart Village", "A large group where opinions converge."
533
+ elif diversity_level == "medium":
534
+ return "Harmonious Assembly", "A significant gathering with mostly aligned views."
535
+ else: # high diversity
536
+ return "Vibrant Forum", "A large, active group with diverse perspectives."
537
+ elif significance_level == "medium":
538
+ if diversity_level == "low":
539
+ return "Quiet Consensus", "A moderately sized group with little disagreement."
540
+ elif diversity_level == "medium":
541
+ return "Mixed Opinions", "A balanced group with varied viewpoints."
542
+ else: # high diversity
543
+ return "Lively Discussion", "A moderately sized group with strong, differing opinions."
544
+ else: # low significance
545
+ if diversity_level == "low":
546
+ return "Echo Chamber Nook", "A small corner where similar thoughts resonate."
547
+ elif diversity_level == "medium":
548
+ return "Scattered Thoughts", "A small group with somewhat varied, isolated views."
549
+ else: # high diversity
550
+ return "Whispering Gallery", "A small group where many different ideas are quietly shared."
551
+
552
+ except Exception as e:
553
+ print(f"Error naming user group for topic {topic_id} and users {user_ids}: {e}")
554
+ return "Mysterious Gathering", "An error occurred while trying to name this group." # Default name and description on error
555
+ finally:
556
+ if local_con:
557
+ local_con.close()
558
+
559
+
560
  # Helper function to get a random unvoted comment
561
  def get_random_unvoted_comment(user_id, topic_id):
562
+ new_area_comments = st.session_state.get("_new_area_comments", [])
563
+ if len(new_area_comments) != 0:
564
+ value = new_area_comments.pop()
565
+ st.session_state._new_area_comments = new_area_comments
566
+ return value[0], value[1]
567
  local_con = None
568
  try:
569
  local_con = duckdb.connect(database=DB_PATH, read_only=False)
 
574
  """, [topic_id]).fetchone()[0]
575
 
576
  if comment_count == 0:
577
+ return None, "Share your insight!"
578
 
579
  # Attempt to get a random comment that the user has NOT voted on
580
  result = local_con.execute("""
 
588
  ORDER BY RANDOM()
589
  LIMIT 1
590
  """, [topic_id, user_id]).fetchone()
 
591
  if result:
592
+ # Check for cluster change and set message flag
593
+ current_label, current_users = get_user_cluster_label(user_id, topic_id)
594
+ current_users_set = set(current_users)
595
+
596
+ previous_label = st.session_state.get('_previous_cluster_label')
597
+ previous_users_set = st.session_state.get('_previous_cluster_users_set', set())
598
+
599
+ # Check if cluster label has changed AND the set of users in the new cluster is different
600
+ # This indicates the user has moved to a different group of commenters
601
+ if current_label is not None and previous_label is not None and current_label != previous_label:
602
+ if current_users_set != previous_users_set:
603
+ # Set a flag in session state to display the message later in the main rendering logic
604
+ st.session_state._show_new_area_message = True
605
+ new_area_comments = get_top_k_polarized_comments_for_users(current_users_set, k=5)
606
+ st.session_state._new_area_comments = new_area_comments
607
+ # print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
608
+ # print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
609
+ st.session_state._previous_cluster_label = current_label
610
+ st.session_state._previous_cluster_users_set = current_users_set
611
+
612
  # Found an unvoted comment
613
  return result[0], result[1]
614
  else:
 
709
  st.title("Welcome to SteamPolis")
710
  st.markdown("Choose an option:")
711
 
712
+ if st.button("Create New Topic (Quest)"):
713
  st.session_state.page = 'create_topic'
714
  st.rerun()
715
 
716
  st.markdown("---")
717
+ st.markdown("Or join an existing topic (quest):")
718
  topic_input = st.text_input("Enter Topic ID or URL")
719
 
720
  if st.button("Join Topic"):
 
797
  current_comment_id = st.session_state.get('current_comment_id')
798
  current_comment_content = st.session_state.get('current_comment_content', "Loading comments...")
799
  comment_history = st.session_state.get('comment_history', "")
800
+ show_new_area_message = st.session_state.get('_show_new_area_message', True)
801
 
802
  if not topic_id:
803
  st.warning("No topic selected. Returning to home.")
 
832
 
833
 
834
  # Include functional information
835
+ st.markdown(f"**Shareable Quest Scroll ID:** `{topic_id}`")
836
  # Construct shareable link using current app URL
837
  app_url = st.query_params.get('base', ['http://localhost:8501/'])[0] # Get base URL if available
838
  shareable_link = f"{app_url}?topic={topic_id}" if app_url else f"?topic={topic_id}"
 
926
  ]
927
  # Randomly select a phrase
928
  random_phrase = random.choice(intro_phrases)
929
+ st.markdown(comment_history)
930
 
931
  if current_comment_id: # Only show voting if there's a comment to vote on
932
  # Display comment history and the current comment with the random intro
933
+ if show_new_area_message == True:
934
+ _, user_ids = get_user_cluster_label(user_id)
935
+ new_area_name, desc = name_user_group(user_ids, topic_id)
936
+ st.markdown(f"You've collected {len(comment_history.splitlines())} insights so far.")
937
+ st.markdown(f"And yet a new place you have arrived: `{new_area_name}`. {desc}")
938
+ st.session_state._show_new_area_message = False
939
+ st.markdown(f"[Collected new insight, {random_phrase}]:\n* {current_comment_content}")
940
 
941
  # Handle vote logic
942
  def handle_vote(vote_type, comment_id, topic_id, user_id):
943
+ # Add JavaScript to scroll to the bottom anchor after the page reloads
944
+ # This script will be included in the next render cycle triggered by st.rerun()
945
+ # Ensure an element with id="bottom" exists in the rendered page,
946
+ # typically placed after the content you want to scroll to (e.g., comment history).
947
  local_con = None
948
  try:
949
  local_con = duckdb.connect(database=DB_PATH, read_only=False)
950
+ # Use INSERT OR REPLACE INTO or ON CONFLICT DO UPDATE to handle repeat votes
951
+ # The UNIQUE constraint on (user_id, comment_id) in the votes table
952
+ # allows us to update the existing vote if one already exists for this user/comment pair.
953
+ # We generate a new UUID for the 'id' column, but it will only be used
954
+ # if this is a new insert. If it's an update, the existing 'id' is kept.
955
+ vote_id = str(uuid.uuid4()) # Generate a new UUID for the potential insert
956
  local_con.execute("""
957
  INSERT INTO votes (id, user_id, comment_id, vote_type)
958
  VALUES (?, ?, ?, ?)
959
+ ON CONFLICT (user_id, comment_id)
960
+ DO UPDATE SET
961
+ vote_type = excluded.vote_type, -- Update vote_type with the new value
962
+ created_at = current_localtimestamp(); -- Update timestamp to reflect the latest vote
963
  """, [vote_id, user_id, comment_id, vote_type])
964
 
965
  # Append voted comment to history
966
+ # Note: This appends the comment regardless of whether it was a new vote or an update.
967
+ # The history is a simple log, not a reflection of vote changes.
968
  vote_text = "πŸ‘" if vote_type == "agree" else "πŸ‘Ž" if vote_type == "disagree" else "😐"
969
+ comment_history = st.session_state.comment_history.split("\n\n")
970
+ if len(comment_history) > 10:
971
+ comment_history = ["..."] + comment_history[-10:]
972
+ st.session_state.comment_history = "\n\n".join(comment_history)
973
  st.session_state.comment_history += f"\n\n{vote_text} {current_comment_content}"
974
 
975
  # Check vote count and trigger special event
 
977
  if 'vote_count' not in st.session_state:
978
  st.session_state.vote_count = 0
979
 
980
+ # Increment vote count only if it was a new vote or a change?
981
+ # The current logic increments on every button click. Let's keep that for now
982
+ # as it drives the special event trigger based on interaction frequency.
983
  st.session_state.vote_count += 1
984
 
985
+ # Check if it's time for a potential special event (every 5 votes/interactions)
986
  if st.session_state.vote_count % 5 == 0:
987
+ st.session_state.vote_count = 0 # Reset count after triggering
988
  # 30% chance to trigger the special sharing event
989
  if random.random() < 0.3:
990
  prompts = [
 
992
  "A letter arrives from the Emperor's office, requesting your personal insight on the matter. What counsel do you offer?",
993
  "As you walk through the streets, people gather, eager to hear your thoughts on the Emperor's dilemma. What advice do you give?"
994
  ]
995
+ # Pass the current topic_id to share_wisdom if needed, though it's not currently used there.
996
  share_wisdom(random.choice(prompts), allow_skip=True)
997
 
998
  # Get next comment
999
+ # This should always get the next unvoted comment for the user in this topic.
1000
  next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
1001
  st.session_state.current_comment_id = next_comment_id
1002
  st.session_state.current_comment_content = next_comment_content
1003
 
1004
  # Update progress
1005
+ # Update the user's progress to the next comment they should see.
1006
  update_user_progress(user_id, topic_id, next_comment_id)
1007
 
1008
  st.rerun() # Rerun to update UI
 
1053
  st.session_state.current_comment_content = "Loading comments..."
1054
  if 'comment_history' not in st.session_state:
1055
  st.session_state.comment_history = ""
1056
+ if 'processed_url_params' not in st.session_state:
1057
+ st.session_state.processed_url_params = False # Add flag initialization
1058
 
1059
  # Initialize the database on first run
1060
  initialize_database()
1061
 
1062
  # Handle initial load from URL query parameters
1063
+ # Process only once per session load using the flag
1064
  query_params = st.query_params
1065
+ # Check for 'topic' param and if it hasn't been processed yet
1066
+ if 'topic' in query_params and not st.session_state.processed_url_params:
1067
+ topic_id_from_url = query_params.get('topic') # Use .get for safety
1068
+ if topic_id_from_url: # Check if topic_id is actually retrieved
1069
+ st.session_state.page = 'view_topic'
1070
+ st.session_state.current_topic_id = topic_id_from_url
1071
+ st.session_state.processed_url_params = True # Mark as processed
1072
+ # The view_topic_page will handle loading user/comment based on session_state.user_email
1073
+ st.rerun() # Rerun to apply the page change
1074
 
1075
 
1076
  # Render the appropriate page based on session state