Spaces:
Running
Running
fix bugs
Browse files- src/streamlit_app.py +52 -31
src/streamlit_app.py
CHANGED
@@ -320,7 +320,8 @@ def get_ttl_hash(seconds=360):
|
|
320 |
def get_r_matrix_from_votes():
|
321 |
local_con = None
|
322 |
try:
|
323 |
-
|
|
|
324 |
|
325 |
# Fetch all vote data
|
326 |
# fetchdf requires pandas
|
@@ -504,58 +505,71 @@ def get_user_cluster_label(user_id, ttl_hash=None):
|
|
504 |
|
505 |
|
506 |
# Helper function to get top k most polarized comments for a list of users
|
507 |
-
def
|
508 |
"""
|
509 |
-
Retrieves the top k comments
|
510 |
-
|
|
|
|
|
|
|
|
|
511 |
|
512 |
Args:
|
513 |
user_ids (list[str]): A list of user IDs.
|
|
|
514 |
k (int): The number of top comments to retrieve.
|
515 |
|
516 |
Returns:
|
517 |
list[tuple]: A list of tuples, where each tuple contains
|
518 |
-
(comment_id, comment_content,
|
519 |
-
ordered by
|
520 |
Returns an empty list if no votes are found for these users
|
521 |
-
or on error.
|
522 |
"""
|
523 |
-
if not user_ids:
|
524 |
-
#
|
525 |
-
|
|
|
526 |
|
527 |
local_con = None
|
528 |
try:
|
529 |
local_con = duckdb.connect(database=DB_PATH, read_only=True)
|
530 |
|
531 |
-
# Use parameterized query for the list of user IDs
|
532 |
# DuckDB's Python API handles lists for IN clauses
|
533 |
query = """
|
534 |
SELECT
|
535 |
v.comment_id,
|
536 |
c.content,
|
537 |
-
|
538 |
WHEN v.vote_type = 'agree' THEN 1.0
|
539 |
WHEN v.vote_type = 'neutral' THEN 0.0
|
540 |
WHEN v.vote_type = 'disagree' THEN -1.0
|
541 |
ELSE NULL -- Should not happen with current data
|
542 |
-
END) as
|
|
|
543 |
FROM votes v
|
544 |
JOIN comments c ON v.comment_id = c.id
|
545 |
-
WHERE v.user_id IN (?)
|
546 |
GROUP BY v.comment_id, c.content
|
547 |
-
HAVING COUNT(v.user_id)
|
548 |
-
ORDER BY
|
549 |
LIMIT ?
|
550 |
"""
|
551 |
-
# Pass the list of user_ids and k as parameters
|
552 |
-
|
|
|
553 |
|
554 |
-
|
|
|
|
|
|
|
|
|
|
|
555 |
|
556 |
except Exception as e:
|
557 |
# st.error is not available here, just print or log
|
558 |
-
print(f"Error getting top k
|
559 |
return [] # Return empty list on error
|
560 |
finally:
|
561 |
if local_con:
|
@@ -591,17 +605,20 @@ def estimate_group_voting_diversity(user_ids, topic_id):
|
|
591 |
|
592 |
# Get all votes for the given topic by the specified users
|
593 |
# Join with comments to filter by topic_id
|
594 |
-
|
|
|
|
|
595 |
SELECT
|
596 |
v.comment_id,
|
597 |
v.user_id,
|
598 |
v.vote_type
|
599 |
FROM votes v
|
600 |
JOIN comments c ON v.comment_id = c.id
|
601 |
-
WHERE c.topic_id = ? AND v.user_id IN (
|
602 |
"""
|
603 |
-
#
|
604 |
-
|
|
|
605 |
|
606 |
if not results:
|
607 |
return 0.0 # No votes found for this group on this topic
|
@@ -643,7 +660,6 @@ def estimate_group_voting_diversity(user_ids, topic_id):
|
|
643 |
if local_con:
|
644 |
local_con.close()
|
645 |
|
646 |
-
|
647 |
# Helper function to name a group of users based on their participation and voting diversity
|
648 |
def name_user_group(user_ids, topic_id):
|
649 |
"""
|
@@ -805,8 +821,10 @@ def get_random_unvoted_comment(user_id, topic_id):
|
|
805 |
if current_label is not None and previous_label is not None and current_label != previous_label:
|
806 |
if current_users_set != previous_users_set:
|
807 |
# Set a flag in session state to display the message later in the main rendering logic
|
|
|
|
|
808 |
st.session_state._show_new_area_message = True
|
809 |
-
new_area_comments =
|
810 |
st.session_state._new_area_comments = new_area_comments
|
811 |
# print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
|
812 |
# print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
|
@@ -1157,7 +1175,7 @@ def view_topic_page():
|
|
1157 |
st.markdown(random.choice(prompts))
|
1158 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
1159 |
st.session_state.handling_vote = True # lock
|
1160 |
-
if st.button("Share
|
1161 |
if new_comment_text and len(new_comment_text.strip()):
|
1162 |
user_email = st.session_state.get('user_email', '')
|
1163 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
@@ -1296,11 +1314,14 @@ if 'comment_history' not in st.session_state:
|
|
1296 |
if 'processed_url_params' not in st.session_state:
|
1297 |
st.session_state.processed_url_params = False # Add flag initialization
|
1298 |
|
1299 |
-
# Initialize the database
|
1300 |
-
|
1301 |
-
|
|
|
1302 |
add_dummy_topic()
|
1303 |
-
st.session_state.
|
|
|
|
|
1304 |
|
1305 |
# Handle initial load from URL query parameters
|
1306 |
# Process only once per session load using the flag
|
|
|
320 |
def get_r_matrix_from_votes():
|
321 |
local_con = None
|
322 |
try:
|
323 |
+
# Use read_only=False to maintain consistent configuration across all connections
|
324 |
+
local_con = duckdb.connect(database=DB_PATH, read_only=False)
|
325 |
|
326 |
# Fetch all vote data
|
327 |
# fetchdf requires pandas
|
|
|
505 |
|
506 |
|
507 |
# Helper function to get top k most polarized comments for a list of users
|
508 |
+
def get_top_k_consensus_comments_for_users(user_ids, topic_id, k=5):
|
509 |
"""
|
510 |
+
Retrieves the top k comments with the highest voting consensus (lowest variance)
|
511 |
+
among a given list of users *for a specific topic*.
|
512 |
+
|
513 |
+
Consensus is measured by the population variance (VAR_POP) of numerical
|
514 |
+
vote scores (-1 for 'disagree', 0 for 'neutral', 1 for 'agree').
|
515 |
+
Lower variance indicates higher consensus.
|
516 |
|
517 |
Args:
|
518 |
user_ids (list[str]): A list of user IDs.
|
519 |
+
topic_id (str): The ID of the topic to filter comments by.
|
520 |
k (int): The number of top comments to retrieve.
|
521 |
|
522 |
Returns:
|
523 |
list[tuple]: A list of tuples, where each tuple contains
|
524 |
+
(comment_id, comment_content, vote_variance),
|
525 |
+
ordered by vote_variance ascending (lowest variance first).
|
526 |
Returns an empty list if no votes are found for these users
|
527 |
+
on this topic, or on error, or if the group has fewer than 2 users.
|
528 |
"""
|
529 |
+
if not user_ids or len(user_ids) < 2:
|
530 |
+
# Need at least 2 users from the group to calculate meaningful variance
|
531 |
+
# print("Warning: get_top_k_consensus_comments_for_users called with fewer than 2 user_ids.") # Optional debug
|
532 |
+
return [] # Cannot query without user IDs or with only one user
|
533 |
|
534 |
local_con = None
|
535 |
try:
|
536 |
local_con = duckdb.connect(database=DB_PATH, read_only=True)
|
537 |
|
538 |
+
# Use parameterized query for the list of user IDs and topic ID
|
539 |
# DuckDB's Python API handles lists for IN clauses
|
540 |
query = """
|
541 |
SELECT
|
542 |
v.comment_id,
|
543 |
c.content,
|
544 |
+
VAR_POP(CASE
|
545 |
WHEN v.vote_type = 'agree' THEN 1.0
|
546 |
WHEN v.vote_type = 'neutral' THEN 0.0
|
547 |
WHEN v.vote_type = 'disagree' THEN -1.0
|
548 |
ELSE NULL -- Should not happen with current data
|
549 |
+
END) as vote_variance,
|
550 |
+
COUNT(v.user_id) as num_votes_in_group -- Include count for potential tie-breaking
|
551 |
FROM votes v
|
552 |
JOIN comments c ON v.comment_id = c.id
|
553 |
+
WHERE v.user_id IN (?) AND c.topic_id = ? -- Filter by user IDs and topic ID
|
554 |
GROUP BY v.comment_id, c.content
|
555 |
+
HAVING COUNT(v.user_id) >= 2 -- Ensure at least 2 users from the list voted on this comment
|
556 |
+
ORDER BY vote_variance ASC, num_votes_in_group DESC -- Order by lowest variance, then by number of votes (more votes = stronger consensus)
|
557 |
LIMIT ?
|
558 |
"""
|
559 |
+
# Pass the list of user_ids, topic_id, and k as parameters
|
560 |
+
# DuckDB requires list parameters to be wrapped in a list/tuple for the execute method
|
561 |
+
result = local_con.execute(query, [user_ids, topic_id, k]).fetchall()
|
562 |
|
563 |
+
# The result includes comment_id, content, variance, and count.
|
564 |
+
# We only need comment_id, content, and variance for the return value as per docstring.
|
565 |
+
# The count was used for ordering.
|
566 |
+
formatted_result = [(row[0], row[1], row[2]) for row in result]
|
567 |
+
|
568 |
+
return formatted_result
|
569 |
|
570 |
except Exception as e:
|
571 |
# st.error is not available here, just print or log
|
572 |
+
print(f"Error getting top k consensus comments for users {user_ids} in topic {topic_id}: {e}")
|
573 |
return [] # Return empty list on error
|
574 |
finally:
|
575 |
if local_con:
|
|
|
605 |
|
606 |
# Get all votes for the given topic by the specified users
|
607 |
# Join with comments to filter by topic_id
|
608 |
+
# Construct the IN clause dynamically to avoid the conversion error
|
609 |
+
placeholders = ', '.join(['?'] * len(user_ids_tuple))
|
610 |
+
query = f"""
|
611 |
SELECT
|
612 |
v.comment_id,
|
613 |
v.user_id,
|
614 |
v.vote_type
|
615 |
FROM votes v
|
616 |
JOIN comments c ON v.comment_id = c.id
|
617 |
+
WHERE c.topic_id = ? AND v.user_id IN ({placeholders})
|
618 |
"""
|
619 |
+
# Pass topic_id and then all user_ids as separate parameters
|
620 |
+
params = [topic_id] + list(user_ids_tuple) # Combine topic_id and user_ids
|
621 |
+
results = local_con.execute(query, params).fetchall()
|
622 |
|
623 |
if not results:
|
624 |
return 0.0 # No votes found for this group on this topic
|
|
|
660 |
if local_con:
|
661 |
local_con.close()
|
662 |
|
|
|
663 |
# Helper function to name a group of users based on their participation and voting diversity
|
664 |
def name_user_group(user_ids, topic_id):
|
665 |
"""
|
|
|
821 |
if current_label is not None and previous_label is not None and current_label != previous_label:
|
822 |
if current_users_set != previous_users_set:
|
823 |
# Set a flag in session state to display the message later in the main rendering logic
|
824 |
+
print("st.session_state._show_new_area_message = True")
|
825 |
+
print("st.session_state._show_new_area_message = True")
|
826 |
st.session_state._show_new_area_message = True
|
827 |
+
new_area_comments = get_top_k_consensus_comments_for_users(current_users_set, topic_id, k=5)
|
828 |
st.session_state._new_area_comments = new_area_comments
|
829 |
# print(f"DEBUG: Cluster changed for user {user_id} in topic {topic_id}: {previous_label} -> {current_label}")
|
830 |
# print(f"DEBUG: Previous users count: {len(previous_users_set)}, Current users count: {len(current_users_set)}")
|
|
|
1175 |
st.markdown(random.choice(prompts))
|
1176 |
new_comment_text = st.text_area("Your Insight that different from others above (Empty to skip)", key="tmp_new_comment_input")
|
1177 |
st.session_state.handling_vote = True # lock
|
1178 |
+
if st.button("Share Wisdom"):
|
1179 |
if new_comment_text and len(new_comment_text.strip()):
|
1180 |
user_email = st.session_state.get('user_email', '')
|
1181 |
user_id = find_or_create_user(user_email) # Ensure user exists
|
|
|
1314 |
if 'processed_url_params' not in st.session_state:
|
1315 |
st.session_state.processed_url_params = False # Add flag initialization
|
1316 |
|
1317 |
+
# Initialize the database and add dummy data only once per session
|
1318 |
+
if st.session_state.get("db_initialized", False) is False:
|
1319 |
+
print("INFO: Initializing database and adding dummy data...") # Optional: Info message
|
1320 |
+
initialize_database()
|
1321 |
add_dummy_topic()
|
1322 |
+
st.session_state.db_initialized = True
|
1323 |
+
print("INFO: Database initialization complete.") # Optional: Info message
|
1324 |
+
|
1325 |
|
1326 |
# Handle initial load from URL query parameters
|
1327 |
# Process only once per session load using the flag
|