Spaces:

npc0
/

SteamPolis

Running

App Files Files

npc0 commited on 16 days ago

Commit

2828463

verified ·

1 Parent(s): c005b55

fix bugs

Browse files

Files changed (1) hide show

src/streamlit_app.py +52 -31

src/streamlit_app.py CHANGED Viewed

@@ -320,7 +320,8 @@ def get_ttl_hash(seconds=360):
 def get_r_matrix_from_votes():
     local_con = None
     try:
-        local_con = duckdb.connect(database=DB_PATH, read_only=True) # Read-only is sufficient
         # Fetch all vote data
         # fetchdf requires pandas
@@ -504,58 +505,71 @@ def get_user_cluster_label(user_id, ttl_hash=None):
 # Helper function to get top k most polarized comments for a list of users
-def get_top_k_polarized_comments_for_users(user_ids, k=5):
     """
-    Retrieves the top k comments most agreed or disagreed upon (most polarized)
-    by a given list of users.
     Args:
         user_ids (list[str]): A list of user IDs.
         k (int): The number of top comments to retrieve.
     Returns:
         list[tuple]: A list of tuples, where each tuple contains
-                     (comment_id, comment_content, average_vote_score),
-                     ordered by the absolute value of the average score descending.
                      Returns an empty list if no votes are found for these users
-                     or on error.
     """
-    if not user_ids:
-        # print("Warning: get_top_k_polarized_comments_for_users called with empty user_ids list.") # Optional debug
-        return [] # Cannot query without user IDs
     local_con = None
     try:
         local_con = duckdb.connect(database=DB_PATH, read_only=True)
-        # Use parameterized query for the list of user IDs
         # DuckDB's Python API handles lists for IN clauses
         query = """
             SELECT
                 v.comment_id,
                 c.content,
-                AVG(CASE
                     WHEN v.vote_type = 'agree' THEN 1.0
                     WHEN v.vote_type = 'neutral' THEN 0.0
                     WHEN v.vote_type = 'disagree' THEN -1.0
                     ELSE NULL -- Should not happen with current data
-                END) as average_vote_score
             FROM votes v
             JOIN comments c ON v.comment_id = c.id
-            WHERE v.user_id IN (?)
             GROUP BY v.comment_id, c.content
-            HAVING COUNT(v.user_id) > 0 -- Ensure at least one user from the list voted on this comment
-            ORDER BY ABS(average_vote_score) DESC
             LIMIT ?
         """
-        # Pass the list of user_ids and k as parameters
-        result = local_con.execute(query, [user_ids, k]).fetchall()
-        return result
     except Exception as e:
         # st.error is not available here, just print or log
-        print(f"Error getting top k polarized comments for users {user_ids}: {e}")
         return [] # Return empty list on error
     finally:
         if local_con:
@@ -591,17 +605,20 @@ def estimate_group_voting_diversity(user_ids, topic_id):
         # Get all votes for the given topic by the specified users
         # Join with comments to filter by topic_id
-        query = """
             SELECT
                 v.comment_id,
                 v.user_id,
                 v.vote_type
             FROM votes v
             JOIN comments c ON v.comment_id = c.id
-            WHERE c.topic_id = ? AND v.user_id IN (?)
         """
-        # DuckDB's Python API handles lists/tuples for IN clauses
-        results = local_con.execute(query, [topic_id, user_ids_tuple]).fetchall()
         if not results:
             return 0.0 # No votes found for this group on this topic
@@ -643,7 +660,6 @@ def estimate_group_voting_diversity(user_ids, topic_id):
         if local_con:
             local_con.close()
 # Helper function to name a group of users based on their participation and voting diversity
 def name_user_group(user_ids, topic_id):
     """
@@ -805,8 +821,10 @@ def get_random_unvoted_comment(user_id, topic_id):
             if current_label is not None and previous_label is not None and current_label != previous_label:
                 if current_users_set != previous_users_set:
                     # Set a flag in session state to display the message later in the main rendering logic
                     st.session_state._show_new_area_message = True
-                    new_area_comments = get_top_k_polarized_comments_for_users(current_users_set, k=5)
                     st.session_state._new_area_comments = new_area_comments
                     # print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
                     # print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
@@ -1157,7 +1175,7 @@ def view_topic_page():
                         st.markdown(random.choice(prompts))
                         new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
                         st.session_state.handling_vote = True # lock
-                        if st.button("Share Your Wisdom"):
                             if new_comment_text and len(new_comment_text.strip()):
                                 user_email = st.session_state.get('user_email', '')
                                 user_id = find_or_create_user(user_email) # Ensure user exists
@@ -1296,11 +1314,14 @@ if 'comment_history' not in st.session_state:
 if 'processed_url_params' not in st.session_state:
     st.session_state.processed_url_params = False # Add flag initialization
-# Initialize the database on first run
-initialize_database()
-if st.session_state.get('_add_dummy', True):
     add_dummy_topic()
-    st.session_state._add_dummy = False
 # Handle initial load from URL query parameters
 # Process only once per session load using the flag

 def get_r_matrix_from_votes():
     local_con = None
     try:
+        # Use read_only=False to maintain consistent configuration across all connections
+        local_con = duckdb.connect(database=DB_PATH, read_only=False)
         # Fetch all vote data
         # fetchdf requires pandas
 # Helper function to get top k most polarized comments for a list of users
+def get_top_k_consensus_comments_for_users(user_ids, topic_id, k=5):
     """
+    Retrieves the top k comments with the highest voting consensus (lowest variance)
+    among a given list of users *for a specific topic*.
+    Consensus is measured by the population variance (VAR_POP) of numerical
+    vote scores (-1 for 'disagree', 0 for 'neutral', 1 for 'agree').
+    Lower variance indicates higher consensus.
     Args:
         user_ids (list[str]): A list of user IDs.
+        topic_id (str): The ID of the topic to filter comments by.
         k (int): The number of top comments to retrieve.
     Returns:
         list[tuple]: A list of tuples, where each tuple contains
+                     (comment_id, comment_content, vote_variance),
+                     ordered by vote_variance ascending (lowest variance first).
                      Returns an empty list if no votes are found for these users
+                     on this topic, or on error, or if the group has fewer than 2 users.
     """
+    if not user_ids or len(user_ids) < 2:
+        # Need at least 2 users from the group to calculate meaningful variance
+        # print("Warning: get_top_k_consensus_comments_for_users called with fewer than 2 user_ids.") # Optional debug
+        return [] # Cannot query without user IDs or with only one user
     local_con = None
     try:
         local_con = duckdb.connect(database=DB_PATH, read_only=True)
+        # Use parameterized query for the list of user IDs and topic ID
         # DuckDB's Python API handles lists for IN clauses
         query = """
             SELECT
                 v.comment_id,
                 c.content,
+                VAR_POP(CASE
                     WHEN v.vote_type = 'agree' THEN 1.0
                     WHEN v.vote_type = 'neutral' THEN 0.0
                     WHEN v.vote_type = 'disagree' THEN -1.0
                     ELSE NULL -- Should not happen with current data
+                END) as vote_variance,
+                COUNT(v.user_id) as num_votes_in_group -- Include count for potential tie-breaking
             FROM votes v
             JOIN comments c ON v.comment_id = c.id
+            WHERE v.user_id IN (?) AND c.topic_id = ? -- Filter by user IDs and topic ID
             GROUP BY v.comment_id, c.content
+            HAVING COUNT(v.user_id) >= 2 -- Ensure at least 2 users from the list voted on this comment
+            ORDER BY vote_variance ASC, num_votes_in_group DESC -- Order by lowest variance, then by number of votes (more votes = stronger consensus)
             LIMIT ?
         """
+        # Pass the list of user_ids, topic_id, and k as parameters
+        # DuckDB requires list parameters to be wrapped in a list/tuple for the execute method
+        result = local_con.execute(query, [user_ids, topic_id, k]).fetchall()
+        # The result includes comment_id, content, variance, and count.
+        # We only need comment_id, content, and variance for the return value as per docstring.
+        # The count was used for ordering.
+        formatted_result = [(row[0], row[1], row[2]) for row in result]
+        return formatted_result
     except Exception as e:
         # st.error is not available here, just print or log
+        print(f"Error getting top k consensus comments for users {user_ids} in topic {topic_id}: {e}")
         return [] # Return empty list on error
     finally:
         if local_con:
         # Get all votes for the given topic by the specified users
         # Join with comments to filter by topic_id
+        # Construct the IN clause dynamically to avoid the conversion error
+        placeholders = ', '.join(['?'] * len(user_ids_tuple))
+        query = f"""
             SELECT
                 v.comment_id,
                 v.user_id,
                 v.vote_type
             FROM votes v
             JOIN comments c ON v.comment_id = c.id
+            WHERE c.topic_id = ? AND v.user_id IN ({placeholders})
         """
+        # Pass topic_id and then all user_ids as separate parameters
+        params = [topic_id] + list(user_ids_tuple) # Combine topic_id and user_ids
+        results = local_con.execute(query, params).fetchall()
         if not results:
             return 0.0 # No votes found for this group on this topic
         if local_con:
             local_con.close()
 # Helper function to name a group of users based on their participation and voting diversity
 def name_user_group(user_ids, topic_id):
     """
             if current_label is not None and previous_label is not None and current_label != previous_label:
                 if current_users_set != previous_users_set:
                     # Set a flag in session state to display the message later in the main rendering logic
+                    print("st.session_state._show_new_area_message = True")
+                    print("st.session_state._show_new_area_message = True")
                     st.session_state._show_new_area_message = True
+                    new_area_comments = get_top_k_consensus_comments_for_users(current_users_set, topic_id, k=5)
                     st.session_state._new_area_comments = new_area_comments
                     # print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
                     # print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
                         st.markdown(random.choice(prompts))
                         new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
                         st.session_state.handling_vote = True # lock
+                        if st.button("Share Wisdom"):
                             if new_comment_text and len(new_comment_text.strip()):
                                 user_email = st.session_state.get('user_email', '')
                                 user_id = find_or_create_user(user_email) # Ensure user exists
 if 'processed_url_params' not in st.session_state:
     st.session_state.processed_url_params = False # Add flag initialization
+# Initialize the database and add dummy data only once per session
+if st.session_state.get("db_initialized", False) is False:
+    print("INFO: Initializing database and adding dummy data...") # Optional: Info message
+    initialize_database()
     add_dummy_topic()
+    st.session_state.db_initialized = True
+    print("INFO: Database initialization complete.") # Optional: Info message
 # Handle initial load from URL query parameters
 # Process only once per session load using the flag