Spaces:

npc0
/

SteamPolis

Running

App Files Files

npc0 commited on 16 days ago

Commit

d970538

verified ·

1 Parent(s): fb3ae30

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +543 -16

src/streamlit_app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import os
 os.environ["STREAMLIT_GLOBAL_CONFIG"] = "/data/.streamlit/config.toml"
 import uuid
 import random
 import urllib.parse # To parse URL parameters
 import streamlit as st
 import duckdb
 # Database file path
 DB_PATH = 'steampolis.duckdb'
@@ -92,8 +97,473 @@ def initialize_database():
         if 'init_con' in locals() and init_con:
             init_con.close()
 # Helper function to get a random unvoted comment
 def get_random_unvoted_comment(user_id, topic_id):
     local_con = None
     try:
         local_con = duckdb.connect(database=DB_PATH, read_only=False)
@@ -104,7 +574,7 @@ def get_random_unvoted_comment(user_id, topic_id):
         """, [topic_id]).fetchone()[0]
         if comment_count == 0:
-            return None, "No comments in this topic yet."
         # Attempt to get a random comment that the user has NOT voted on
         result = local_con.execute("""
@@ -118,8 +588,27 @@ def get_random_unvoted_comment(user_id, topic_id):
             ORDER BY RANDOM()
             LIMIT 1
         """, [topic_id, user_id]).fetchone()
         if result:
             # Found an unvoted comment
             return result[0], result[1]
         else:
@@ -220,12 +709,12 @@ def home_page():
     st.title("Welcome to SteamPolis")
     st.markdown("Choose an option:")
-    if st.button("Create New Topic"):
         st.session_state.page = 'create_topic'
         st.rerun()
     st.markdown("---")
-    st.markdown("Or join an existing topic:")
     topic_input = st.text_input("Enter Topic ID or URL")
     if st.button("Join Topic"):
@@ -308,6 +797,7 @@ def view_topic_page():
     current_comment_id = st.session_state.get('current_comment_id')
     current_comment_content = st.session_state.get('current_comment_content', "Loading comments...")
     comment_history = st.session_state.get('comment_history', "")
     if not topic_id:
         st.warning("No topic selected. Returning to home.")
@@ -342,7 +832,7 @@ def view_topic_page():
     # Include functional information
-    st.markdown(f"**Quest Scroll ID:** `{topic_id}`")
     # Construct shareable link using current app URL
     app_url = st.query_params.get('base', ['http://localhost:8501/'])[0] # Get base URL if available
     shareable_link = f"{app_url}?topic={topic_id}" if app_url else f"?topic={topic_id}"
@@ -436,24 +926,50 @@ def view_topic_page():
     ]
     # Randomly select a phrase
     random_phrase = random.choice(intro_phrases)
     if current_comment_id: # Only show voting if there's a comment to vote on
         # Display comment history and the current comment with the random intro
-        st.markdown(f"{comment_history}\n\n[Collected new insight, {random_phrase}]:\n* {current_comment_content}")
         # Handle vote logic
         def handle_vote(vote_type, comment_id, topic_id, user_id):
             local_con = None
             try:
                 local_con = duckdb.connect(database=DB_PATH, read_only=False)
-                vote_id = str(uuid.uuid4())
                 local_con.execute("""
                     INSERT INTO votes (id, user_id, comment_id, vote_type)
                     VALUES (?, ?, ?, ?)
                 """, [vote_id, user_id, comment_id, vote_type])
                 # Append voted comment to history
                 vote_text = "👍" if vote_type == "agree" else "👎" if vote_type == "disagree" else "😐"
                 st.session_state.comment_history += f"\n\n{vote_text} {current_comment_content}"
                 # Check vote count and trigger special event
@@ -461,11 +977,14 @@ def view_topic_page():
                 if 'vote_count' not in st.session_state:
                     st.session_state.vote_count = 0
                 st.session_state.vote_count += 1
-                # Check if it's time for a potential special event (every 5 votes)
                 if st.session_state.vote_count % 5 == 0:
-                    st.session_state.vote_count = 0
                     # 30% chance to trigger the special sharing event
                     if random.random() < 0.3:
                         prompts = [
@@ -473,14 +992,17 @@ def view_topic_page():
                             "A letter arrives from the Emperor's office, requesting your personal insight on the matter. What counsel do you offer?",
                             "As you walk through the streets, people gather, eager to hear your thoughts on the Emperor's dilemma. What advice do you give?"
                         ]
                         share_wisdom(random.choice(prompts), allow_skip=True)
                 # Get next comment
                 next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
                 st.session_state.current_comment_id = next_comment_id
                 st.session_state.current_comment_content = next_comment_content
                 # Update progress
                 update_user_progress(user_id, topic_id, next_comment_id)
                 st.rerun() # Rerun to update UI
@@ -531,19 +1053,24 @@ if 'current_comment_content' not in st.session_state:
     st.session_state.current_comment_content = "Loading comments..."
 if 'comment_history' not in st.session_state:
     st.session_state.comment_history = ""
 # Initialize the database on first run
 initialize_database()
 # Handle initial load from URL query parameters
 query_params = st.query_params
-if 'topic' in query_params and st.session_state.page == 'home':
-    topic_id_from_url = query_params['topic']
-    st.session_state.page = 'view_topic'
-    st.session_state.current_topic_id = topic_id_from_url
-    # The view_topic_page will handle loading user/comment based on session_state.user_email
-    st.query_params = {} # Clear query params after processing
-    st.rerun()
 # Render the appropriate page based on session state

 import os
 os.environ["STREAMLIT_GLOBAL_CONFIG"] = "/data/.streamlit/config.toml"
+import time
 import uuid
 import random
 import urllib.parse # To parse URL parameters
+from functools import lru_cache
 import streamlit as st
+import numpy as np
+import pandas as pd
 import duckdb
+import hdbscan
 # Database file path
 DB_PATH = 'steampolis.duckdb'
         if 'init_con' in locals() and init_con:
             init_con.close()
+def get_ttl_hash(seconds=360):
+    """Return the same value withing `seconds` time period"""
+    return round(time.time() / seconds)
+# Helper function to get the R matrix from user voting data
+# This matrix represents user-comment interactions (votes)
+# Users are rows, comments are columns.
+# Values: 1 for 'agree', 0 for 'neutral', -1 for 'disagree', NaN for unvoted.
+# Requires pandas and numpy.
+def get_r_matrix_from_votes():
+    local_con = None
+    try:
+        local_con = duckdb.connect(database=DB_PATH, read_only=True) # Read-only is sufficient
+        # Fetch all vote data
+        # fetchdf requires pandas
+        votes_df = local_con.execute("""
+            SELECT user_id, comment_id, vote_type
+            FROM votes
+        """).fetchdf()
+        if votes_df.empty:
+            # Return empty matrix and mappings if no votes exist
+            # pd.DataFrame requires pandas
+            return pd.DataFrame(), {}, {}
+        # Map vote types to numerical values
+        vote_mapping = {'agree': 1, 'neutral': 0, 'disagree': -1}
+        votes_df['vote_value'] = votes_df['vote_type'].map(vote_mapping)
+        # Create the R matrix using pivot_table
+        # This automatically handles missing user-comment pairs by filling with NaN
+        # pivot_table requires pandas
+        r_matrix = votes_df.pivot_table(
+            index='user_id',
+            columns='comment_id',
+            values='vote_value'
+        )
+        # Create mappings from user/comment IDs to matrix indices (optional but useful)
+        user_id_to_index = {user_id: i for i, user_id in enumerate(r_matrix.index)}
+        comment_id_to_index = {comment_id: i for i, comment_id in enumerate(r_matrix.columns)}
+        return r_matrix, user_id_to_index, comment_id_to_index
+    except Exception as e:
+        # st.error is not available here, just print or log
+        print(f"Error generating R matrix: {e}")
+        # Return empty results in case of error
+        # pd.DataFrame requires pandas
+        return pd.DataFrame(), {}, {}
+    finally:
+        if local_con:
+            local_con.close()
+# Custom Hamming-like distance function handling NaNs for clustering
+# Assumes numpy is imported as np
+def hamming_distance_with_nan(u1, u2):
+    """
+    Calculates a Hamming-like distance between two vectors (user vote profiles)
+    ignoring positions where either value is NaN.
+    Args:
+        u1 (np.ndarray or pd.Series): First vector.
+        u2 (np.ndarray or pd.Series): Second vector.
+    Returns:
+        float: The proportion of differing elements among non-NaN positions.
+               Returns 0.0 if vectors are identical (including all NaN),
+               1.0 if different but no common non-NaN positions.
+    """
+    u1 = np.asarray(u1)
+    u2 = np.asarray(u2)
+    # Find positions where both are not NaN
+    both_not_nan_mask = ~np.isnan(u1) & ~np.isnan(u2)
+    # If no common non-NaN values
+    if not np.any(both_not_nan_mask):
+         # If vectors are identical (e.g., both all NaN), distance is 0.
+         # If different vectors with no common non-NaN, distance is 1 (max difference).
+         if np.array_equal(u1, u2, equal_nan=True):
+              return 0.0
+         else:
+              return 1.0
+    # Filter to only positions where both are not NaN
+    u1_filtered = u1[both_not_nan_mask]
+    u2_filtered = u2[both_not_nan_mask]
+    # Calculate proportion of differing elements among common non-NaN positions
+    diff_count = np.sum(u1_filtered != u2_filtered)
+    total_count = len(u1_filtered)
+    return diff_count / total_count
+# Function to get clusters using HDBSCAN with the custom Hamming distance
+# Assumes pandas is imported as pd, numpy as np, and hdbscan is imported
+def get_clusters_from_r_matrix(r_matrix):
+    """
+    Performs HDBSCAN clustering on the R matrix using a custom Hamming-like distance
+    that handles NaN values.
+    Args:
+        r_matrix (pd.DataFrame): The user-comment vote matrix from get_r_matrix_from_votes.
+                                 Index should be user_id, columns comment_id.
+    Returns:
+        np.ndarray: An array of cluster labels for each user in the r_matrix index.
+                    -1 indicates noise. Returns empty array if clustering fails or
+                    r_matrix is empty.
+    """
+    # Check if r_matrix is empty
+    if r_matrix.empty:
+        print("R matrix is empty, cannot perform clustering.")
+        return np.array([])
+    try:
+        # Instantiate HDBSCAN with the custom metric
+        # Using default parameters for min_cluster_size and min_samples
+        # These might need tuning based on data characteristics and desired cluster granularity
+        # allow_single_cluster=True prevents an error if all points form one cluster
+        clusterer = hdbscan.HDBSCAN(metric=hamming_distance_with_nan, allow_single_cluster=True)
+        # Fit the model directly to the DataFrame values
+        # HDBSCAN fit expects a numpy array or similar structure
+        clusterer.fit(r_matrix.values)
+        # Return the cluster labels
+        return clusterer.labels_
+    except Exception as e:
+        # In a Streamlit app context, st.error would be better, but not available here.
+        # Print to console/logs.
+        print(f"Error during HDBSCAN clustering: {e}")
+        return np.array([]) # Return empty array on error
+def get_cluster_labels():
+    r_matrix, user_id_to_index, _ = get_r_matrix_from_votes()
+    cluster_labels = get_clusters_from_r_matrix(r_matrix)
+    if len(cluster_labels) == 0:
+        cluster_labels = [0] * len(user_id_to_index)
+    return cluster_labels, user_id_to_index
+# Function to get the cluster label for a specific user
+@lru_cache()
+def get_user_cluster_label(user_id, ttl_hash=None):
+    """
+    Gets the HDBSCAN cluster label for a specific user and a list of users
+    sharing the same cluster.
+    Args:
+        user_id (str): The ID of the user.
+    Returns:
+        tuple: A tuple containing:
+            - int or None: The cluster label (an integer, -1 for noise) if the user
+                           is found in the clustering result, otherwise None.
+            - list[str]: A list of user IDs (including the input user_id if found)
+                         that belong to the same cluster. Returns an empty list
+                         if the user is not found or has no cluster label.
+    """
+    # get_cluster_labels is already cached, so calling it repeatedly is fine
+    cluster_labels, user_id_to_index = get_cluster_labels()
+    # Create a reverse mapping from index to user_id for easier lookup
+    index_to_user_id = {index: uid for uid, index in user_id_to_index.items()}
+    target_cluster_label = None
+    same_cluster_users = []
+    # Check if the user_id exists in the mapping
+    if user_id in user_id_to_index:
+        user_index = user_id_to_index[user_id]
+        # Ensure the index is within the bounds of the cluster_labels array
+        if 0 <= user_index < len(cluster_labels):
+            target_cluster_label = int(cluster_labels[user_index]) # Get the target label
+            # Find all users with the same cluster label
+            for index, current_user_id in index_to_user_id.items():
+                # Ensure the index is valid for cluster_labels
+                if 0 <= index < len(cluster_labels):
+                    current_user_label = int(cluster_labels[index])
+                    if current_user_label == target_cluster_label:
+                        same_cluster_users.append(current_user_id)
+                else:
+                     # This case should ideally not happen if index_to_user_id is consistent
+                     print(f"Warning: Index {index} from index_to_user_id out of bounds for cluster labels array length {len(cluster_labels)}")
+        else:
+            # This case should ideally not happen if user_id_to_index is consistent
+            print(f"Warning: User index {user_index} out of bounds for cluster labels array length {len(cluster_labels)}")
+            # Return None and empty list as user couldn't be processed
+            return None, []
+    else:
+        # User not found in the R matrix used for clustering (e.g., new user with no votes)
+        # print(f"User ID {user_id} not found in clustering data.") # Optional: for debugging
+        # Return None and empty list as user is not part of the current clustering result
+        return None, []
+    # Return the target user's label and the list of users in that cluster
+    return target_cluster_label, same_cluster_users
+# Helper function to get top k most polarized comments for a list of users
+def get_top_k_polarized_comments_for_users(user_ids, k=5):
+    """
+    Retrieves the top k comments most agreed or disagreed upon (most polarized)
+    by a given list of users.
+    Args:
+        user_ids (list[str]): A list of user IDs.
+        k (int): The number of top comments to retrieve.
+    Returns:
+        list[tuple]: A list of tuples, where each tuple contains
+                     (comment_id, comment_content, average_vote_score),
+                     ordered by the absolute value of the average score descending.
+                     Returns an empty list if no votes are found for these users
+                     or on error.
+    """
+    if not user_ids:
+        # print("Warning: get_top_k_polarized_comments_for_users called with empty user_ids list.") # Optional debug
+        return [] # Cannot query without user IDs
+    local_con = None
+    try:
+        local_con = duckdb.connect(database=DB_PATH, read_only=True)
+        # Use parameterized query for the list of user IDs
+        # DuckDB's Python API handles lists for IN clauses
+        query = """
+            SELECT
+                v.comment_id,
+                c.content,
+                AVG(CASE
+                    WHEN v.vote_type = 'agree' THEN 1.0
+                    WHEN v.vote_type = 'neutral' THEN 0.0
+                    WHEN v.vote_type = 'disagree' THEN -1.0
+                    ELSE NULL -- Should not happen with current data
+                END) as average_vote_score
+            FROM votes v
+            JOIN comments c ON v.comment_id = c.id
+            WHERE v.user_id IN (?)
+            GROUP BY v.comment_id, c.content
+            HAVING COUNT(v.user_id) > 0 -- Ensure at least one user from the list voted on this comment
+            ORDER BY ABS(average_vote_score) DESC
+            LIMIT ?
+        """
+        # Pass the list of user_ids and k as parameters
+        result = local_con.execute(query, [user_ids, k]).fetchall()
+        return result
+    except Exception as e:
+        # st.error is not available here, just print or log
+        print(f"Error getting top k polarized comments for users {user_ids}: {e}")
+        return [] # Return empty list on error
+    finally:
+        if local_con:
+            local_con.close()
+@lru_cache()
+def estimate_group_voting_diversity(user_ids, topic_id):
+    """
+    Estimates the diversity of voting within a group of users for a specific topic.
+    Diversity is measured by the average variance of numerical vote scores (-1, 0, 1)
+    across comments that at least two users in the group have voted on.
+    Args:
+        user_ids (list[str]): A list of user IDs belonging to the group.
+        topic_id (str): The ID of the topic.
+    Returns:
+        float: A diversity score between 0.0 and 1.0. 0.0 indicates no diversity
+               (all users voted the same way on all shared comments), 1.0 indicates
+               maximum possible diversity (e.g., half agree, half disagree on shared comments).
+               Returns 0.0 if the group has less than 2 users or if no comments
+               were voted on by at least two users in the group.
+    """
+    if not user_ids or len(user_ids) < 2:
+        return 0.0
+    local_con = None
+    try:
+        local_con = duckdb.connect(database=DB_PATH, read_only=True)
+        # Get all votes for the given topic by the specified users
+        # Join with comments to filter by topic_id
+        query = """
+            SELECT
+                v.comment_id,
+                v.user_id,
+                v.vote_type
+            FROM votes v
+            JOIN comments c ON v.comment_id = c.id
+            WHERE c.topic_id = ? AND v.user_id IN (?)
+        """
+        # DuckDB's Python API handles lists for IN clauses
+        results = local_con.execute(query, [topic_id, user_ids]).fetchall()
+        if not results:
+            return 0.0 # No votes found for this group on this topic
+        # Map vote types to numerical scores
+        vote_map = {'agree': 1.0, 'neutral': 0.0, 'disagree': -1.0}
+        # Group votes by comment ID
+        votes_by_comment = {}
+        for comment_id, user_id, vote_type in results:
+            if comment_id not in votes_by_comment:
+                votes_by_comment[comment_id] = []
+            # Append the numerical vote score
+            votes_by_comment[comment_id].append(vote_map.get(vote_type, 0.0)) # Default to 0.0 for unknown types
+        # Calculate variance for comments voted on by at least two users in the group
+        variances = []
+        for comment_id, comment_votes in votes_by_comment.items():
+            # Ensure the comment was voted on by at least two users from the input list
+            if len(comment_votes) >= 2:
+                # Use numpy to calculate variance
+                variances.append(np.var(comment_votes))
+        if not variances:
+            return 0.0 # No comments voted on by at least two users in the group
+        # The maximum possible variance for values in [-1, 0, 1] is 1.0
+        # (e.g., half votes are 1, half are -1).
+        # The average variance is already in the range [0, 1].
+        average_variance = np.mean(variances)
+        return average_variance
+    except Exception as e:
+        # st.error is not available here, just print or log
+        print(f"Error estimating group voting diversity for topic {topic_id} and users {user_ids}: {e}")
+        return 0.0 # Return 0.0 on error
+    finally:
+        if local_con:
+            local_con.close()
+# Helper function to name a group of users based on their participation and voting diversity
+def name_user_group(user_ids, topic_id):
+    """
+    Generates a descriptive name and description for a group of users within a
+    specific topic based on their participation level and voting diversity.
+    Args:
+        user_ids (list[str]): A list of user IDs belonging to the group.
+        topic_id (str): The ID of the topic.
+    Returns:
+        tuple[str, str]: A tuple containing the name and description for the group.
+                         Returns ("Silent Gathering", "This group has no members.")
+                         or ("Unengaged Group", "No members of this group have voted on this topic.")
+                         or ("Isolated Voices", "This topic has no voters yet.")
+                         or ("Mysterious Gathering", "An error occurred while trying to name this group.")
+                         in edge cases or on error.
+    """
+    # Handle empty user list
+    if not user_ids:
+        return "Silent Gathering", "This group has no members."
+    local_con = None
+    try:
+        local_con = duckdb.connect(database=DB_PATH, read_only=True)
+        # 1. Get total unique users who voted in the topic
+        total_voters_result = local_con.execute("""
+            SELECT COUNT(DISTINCT user_id)
+            FROM votes v
+            JOIN comments c ON v.comment_id = c.id
+            WHERE c.topic_id = ?
+        """, [topic_id]).fetchone()
+        total_voters_in_topic = total_voters_result[0] if total_voters_result else 0
+        # 2. Get unique users from the input list who voted in the topic
+        # Filter user_ids to only those present in the votes table for this topic
+        # DuckDB IN clause handles lists directly
+        group_voters_result = local_con.execute("""
+            SELECT COUNT(DISTINCT user_id)
+            FROM votes v
+            JOIN comments c ON v.comment_id = c.id
+            WHERE c.topic_id = ? AND v.user_id IN (?)
+        """, [topic_id, user_ids]).fetchone()
+        group_voters_count = group_voters_result[0] if group_voters_result else 0
+        # Handle case where no one in the group has voted on this topic
+        if group_voters_count == 0:
+             return "Unengaged Group", "No members of this group have voted on this topic."
+        # Handle case where topic has no voters but the group somehow has voters (shouldn't happen if queries are correct)
+        if total_voters_in_topic == 0:
+             # This case is unlikely if group_voters_count > 0, but for safety
+             return "Isolated Voices", "This topic has no voters yet."
+        # 3. Calculate significance (proportion of group voters among all topic voters)
+        significance_proportion = group_voters_count / total_voters_in_topic
+        # 4. Get diversity score for the group
+        diversity_score = estimate_group_voting_diversity(user_ids, topic_id)
+        # 5. Determine name and description based on significance and diversity
+        # Define thresholds (can be tuned)
+        SIG_LOW_THRESHOLD = 0.1
+        SIG_MED_THRESHOLD = 0.5 # High if > MED, Med if > LOW and <= MED, Low if <= LOW
+        DIV_LOW_THRESHOLD = 0.2
+        DIV_MED_THRESHOLD = 0.5 # High if > MED, Med if > LOW and <= MED, Low if <= LOW
+        significance_level = "low"
+        if significance_proportion > SIG_MED_THRESHOLD:
+            significance_level = "high"
+        elif significance_proportion > SIG_LOW_THRESHOLD:
+            significance_level = "medium"
+        diversity_level = "low"
+        if diversity_score > DIV_MED_THRESHOLD:
+            diversity_level = "high"
+        elif diversity_score > DIV_LOW_THRESHOLD:
+            diversity_level = "medium"
+        # Assign names and descriptions based on levels
+        if significance_level == "high":
+            if diversity_level == "low":
+                return "Likeheart Village", "A large group where opinions converge."
+            elif diversity_level == "medium":
+                return "Harmonious Assembly", "A significant gathering with mostly aligned views."
+            else: # high diversity
+                return "Vibrant Forum", "A large, active group with diverse perspectives."
+        elif significance_level == "medium":
+            if diversity_level == "low":
+                return "Quiet Consensus", "A moderately sized group with little disagreement."
+            elif diversity_level == "medium":
+                return "Mixed Opinions", "A balanced group with varied viewpoints."
+            else: # high diversity
+                return "Lively Discussion", "A moderately sized group with strong, differing opinions."
+        else: # low significance
+            if diversity_level == "low":
+                return "Echo Chamber Nook", "A small corner where similar thoughts resonate."
+            elif diversity_level == "medium":
+                return "Scattered Thoughts", "A small group with somewhat varied, isolated views."
+            else: # high diversity
+                return "Whispering Gallery", "A small group where many different ideas are quietly shared."
+    except Exception as e:
+        print(f"Error naming user group for topic {topic_id} and users {user_ids}: {e}")
+        return "Mysterious Gathering", "An error occurred while trying to name this group." # Default name and description on error
+    finally:
+        if local_con:
+            local_con.close()
 # Helper function to get a random unvoted comment
 def get_random_unvoted_comment(user_id, topic_id):
+    new_area_comments = st.session_state.get("_new_area_comments", [])
+    if len(new_area_comments) != 0:
+        value = new_area_comments.pop()
+        st.session_state._new_area_comments = new_area_comments
+        return value[0], value[1]
     local_con = None
     try:
         local_con = duckdb.connect(database=DB_PATH, read_only=False)
         """, [topic_id]).fetchone()[0]
         if comment_count == 0:
+            return None, "Share your insight!"
         # Attempt to get a random comment that the user has NOT voted on
         result = local_con.execute("""
             ORDER BY RANDOM()
             LIMIT 1
         """, [topic_id, user_id]).fetchone()
         if result:
+            # Check for cluster change and set message flag
+            current_label, current_users = get_user_cluster_label(user_id, topic_id)
+            current_users_set = set(current_users)
+            previous_label = st.session_state.get('_previous_cluster_label')
+            previous_users_set = st.session_state.get('_previous_cluster_users_set', set())
+            # Check if cluster label has changed AND the set of users in the new cluster is different
+            # This indicates the user has moved to a different group of commenters
+            if current_label is not None and previous_label is not None and current_label != previous_label:
+                if current_users_set != previous_users_set:
+                    # Set a flag in session state to display the message later in the main rendering logic
+                    st.session_state._show_new_area_message = True
+                    new_area_comments = get_top_k_polarized_comments_for_users(current_users_set, k=5)
+                    st.session_state._new_area_comments = new_area_comments
+                    # print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
+                    # print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
+            st.session_state._previous_cluster_label = current_label
+            st.session_state._previous_cluster_users_set = current_users_set
             # Found an unvoted comment
             return result[0], result[1]
         else:
     st.title("Welcome to SteamPolis")
     st.markdown("Choose an option:")
+    if st.button("Create New Topic (Quest)"):
         st.session_state.page = 'create_topic'
         st.rerun()
     st.markdown("---")
+    st.markdown("Or join an existing topic (quest):")
     topic_input = st.text_input("Enter Topic ID or URL")
     if st.button("Join Topic"):
     current_comment_id = st.session_state.get('current_comment_id')
     current_comment_content = st.session_state.get('current_comment_content', "Loading comments...")
     comment_history = st.session_state.get('comment_history', "")
+    show_new_area_message = st.session_state.get('_show_new_area_message', True)
     if not topic_id:
         st.warning("No topic selected. Returning to home.")
     # Include functional information
+    st.markdown(f"**Shareable Quest Scroll ID:** `{topic_id}`")
     # Construct shareable link using current app URL
     app_url = st.query_params.get('base', ['http://localhost:8501/'])[0] # Get base URL if available
     shareable_link = f"{app_url}?topic={topic_id}" if app_url else f"?topic={topic_id}"
     ]
     # Randomly select a phrase
     random_phrase = random.choice(intro_phrases)
+    st.markdown(comment_history)
     if current_comment_id: # Only show voting if there's a comment to vote on
         # Display comment history and the current comment with the random intro
+        if show_new_area_message == True:
+            _, user_ids = get_user_cluster_label(user_id)
+            new_area_name, desc = name_user_group(user_ids, topic_id)
+            st.markdown(f"You've collected {len(comment_history.splitlines())} insights so far.")
+            st.markdown(f"And yet a new place you have arrived: `{new_area_name}`. {desc}")
+            st.session_state._show_new_area_message = False
+        st.markdown(f"[Collected new insight, {random_phrase}]:\n* {current_comment_content}")
         # Handle vote logic
         def handle_vote(vote_type, comment_id, topic_id, user_id):
+            # Add JavaScript to scroll to the bottom anchor after the page reloads
+            # This script will be included in the next render cycle triggered by st.rerun()
+            # Ensure an element with id="bottom" exists in the rendered page,
+            # typically placed after the content you want to scroll to (e.g., comment history).
             local_con = None
             try:
                 local_con = duckdb.connect(database=DB_PATH, read_only=False)
+                # Use INSERT OR REPLACE INTO or ON CONFLICT DO UPDATE to handle repeat votes
+                # The UNIQUE constraint on (user_id, comment_id) in the votes table
+                # allows us to update the existing vote if one already exists for this user/comment pair.
+                # We generate a new UUID for the 'id' column, but it will only be used
+                # if this is a new insert. If it's an update, the existing 'id' is kept.
+                vote_id = str(uuid.uuid4()) # Generate a new UUID for the potential insert
                 local_con.execute("""
                     INSERT INTO votes (id, user_id, comment_id, vote_type)
                     VALUES (?, ?, ?, ?)
+                    ON CONFLICT (user_id, comment_id)
+                    DO UPDATE SET
+                        vote_type = excluded.vote_type, -- Update vote_type with the new value
+                        created_at = current_localtimestamp(); -- Update timestamp to reflect the latest vote
                 """, [vote_id, user_id, comment_id, vote_type])
                 # Append voted comment to history
+                # Note: This appends the comment regardless of whether it was a new vote or an update.
+                # The history is a simple log, not a reflection of vote changes.
                 vote_text = "👍" if vote_type == "agree" else "👎" if vote_type == "disagree" else "😐"
+                comment_history = st.session_state.comment_history.split("\n\n")
+                if len(comment_history) > 10:
+                    comment_history = ["..."] + comment_history[-10:]
+                st.session_state.comment_history = "\n\n".join(comment_history)
                 st.session_state.comment_history += f"\n\n{vote_text} {current_comment_content}"
                 # Check vote count and trigger special event
                 if 'vote_count' not in st.session_state:
                     st.session_state.vote_count = 0
+                # Increment vote count only if it was a new vote or a change?
+                # The current logic increments on every button click. Let's keep that for now
+                # as it drives the special event trigger based on interaction frequency.
                 st.session_state.vote_count += 1
+                # Check if it's time for a potential special event (every 5 votes/interactions)
                 if st.session_state.vote_count % 5 == 0:
+                    st.session_state.vote_count = 0 # Reset count after triggering
                     # 30% chance to trigger the special sharing event
                     if random.random() < 0.3:
                         prompts = [
                             "A letter arrives from the Emperor's office, requesting your personal insight on the matter. What counsel do you offer?",
                             "As you walk through the streets, people gather, eager to hear your thoughts on the Emperor's dilemma. What advice do you give?"
                         ]
+                        # Pass the current topic_id to share_wisdom if needed, though it's not currently used there.
                         share_wisdom(random.choice(prompts), allow_skip=True)
                 # Get next comment
+                # This should always get the next unvoted comment for the user in this topic.
                 next_comment_id, next_comment_content = get_random_unvoted_comment(user_id, topic_id)
                 st.session_state.current_comment_id = next_comment_id
                 st.session_state.current_comment_content = next_comment_content
                 # Update progress
+                # Update the user's progress to the next comment they should see.
                 update_user_progress(user_id, topic_id, next_comment_id)
                 st.rerun() # Rerun to update UI
     st.session_state.current_comment_content = "Loading comments..."
 if 'comment_history' not in st.session_state:
     st.session_state.comment_history = ""
+if 'processed_url_params' not in st.session_state:
+    st.session_state.processed_url_params = False # Add flag initialization
 # Initialize the database on first run
 initialize_database()
 # Handle initial load from URL query parameters
+# Process only once per session load using the flag
 query_params = st.query_params
+# Check for 'topic' param and if it hasn't been processed yet
+if 'topic' in query_params and not st.session_state.processed_url_params:
+    topic_id_from_url = query_params.get('topic') # Use .get for safety
+    if topic_id_from_url: # Check if topic_id is actually retrieved
+        st.session_state.page = 'view_topic'
+        st.session_state.current_topic_id = topic_id_from_url
+        st.session_state.processed_url_params = True # Mark as processed
+        # The view_topic_page will handle loading user/comment based on session_state.user_email
+        st.rerun() # Rerun to apply the page change
 # Render the appropriate page based on session state