Spaces:

helvia
/

sessions-observatory

Sleeping

App Files Files Community

sessions-observatory / app.py

eloukas

Add files for HF deployment

1b75011 about 1 month ago

raw

history blame

55 kB

	import base64
	import io
	import random

	import dash
	import numpy as np
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from dash import Input, Output, State, callback, dcc, html

	# Initialize the Dash app
	app = dash.Dash(__name__, suppress_callback_exceptions=True)
	server = app.server


	# Define app layout
	app.layout = html.Div(
	[
	# Header
	html.Div(
	[
	html.H1(
	"Sessions Observatory by helvia.ai 🔭📊",
	className="app-header",
	),
	html.P(
	"Upload a CSV/Excel file to visualize the chatbot's dialog topics.",
	className="app-description",
	),
	],
	className="header-container",
	),
	# File Upload Component
	html.Div(
	[
	dcc.Upload(
	id="upload-data",
	children=html.Div(
	[
	html.Div("Drag and Drop", className="upload-text"),
	html.Div("or", className="upload-divider"),
	html.Div(
	html.Button("Select a File", className="upload-button")
	),
	],
	className="upload-content",
	),
	style={
	"width": "100%",
	"height": "120px",
	"lineHeight": "60px",
	"borderWidth": "1px",
	"borderStyle": "dashed",
	"borderRadius": "0.5rem",
	"textAlign": "center",
	"margin": "10px 0",
	"backgroundColor": "hsl(210, 40%, 98%)",
	"borderColor": "hsl(214.3, 31.8%, 91.4%)",
	"cursor": "pointer",
	},
	multiple=False,
	),
	# Status message with more padding and emphasis
	html.Div(
	id="upload-status",
	className="upload-status-message",
	style={"display": "none"}, # Initially hidden
	),
	],
	className="upload-container",
	),
	# Main Content Area (hidden until file is uploaded)
	html.Div(
	[
	# Dashboard layout with flexible grid
	html.Div(
	[
	# Left side: Bubble chart
	html.Div(
	[
	html.H3(
	id="topic-distribution-header",
	children="Sessions Observatory",
	className="section-header",
	),
	# dcc.Graph(id="bubble-chart", style={"height": "80vh"}),
	dcc.Graph(
	id="bubble-chart",
	style={"height": "calc(100% - 154px)"},
	), # this does not work for some reason
	html.Div(
	[
	# Only keep Color by
	html.Div(
	[
	html.Div(
	html.Label(
	"Color by:",
	className="control-label",
	),
	className="control-label-container",
	),
	],
	className="control-labels-row",
	),
	# Only keep Color by options
	html.Div(
	[
	html.Div(
	dcc.RadioItems(
	id="color-metric",
	options=[
	{
	"label": "Sentiment",
	"value": "negative_rate",
	},
	{
	"label": "Resolution",
	"value": "unresolved_rate",
	},
	{
	"label": "Urgency",
	"value": "urgent_rate",
	},
	],
	value="negative_rate",
	inline=True,
	className="radio-group",
	inputClassName="radio-input",
	labelClassName="radio-label",
	),
	className="radio-container",
	),
	],
	className="control-options-row",
	),
	],
	className="chart-controls",
	),
	],
	className="chart-container",
	),
	# Right side: Interactive sidebar with topic details
	html.Div(
	[
	html.Div(
	[
	html.H3(
	"Topic Details", className="section-header"
	),
	html.Div(
	id="topic-title", className="topic-title"
	),
	html.Div(
	[
	html.Div(
	[
	html.H4(
	"Metadata",
	className="subsection-header",
	),
	html.Div(
	id="topic-metadata",
	className="metadata-container",
	),
	],
	className="metadata-section",
	),
	html.Div(
	[
	html.H4(
	"Key Metrics",
	className="subsection-header",
	),
	html.Div(
	id="topic-metrics",
	className="metrics-container",
	),
	],
	className="metrics-section",
	),
	# Added Tags section
	html.Div(
	[
	html.H4(
	"Tags",
	className="subsection-header",
	),
	html.Div(
	id="important-tags",
	className="tags-container",
	),
	]
	),
	],
	className="details-section",
	),
	html.Div(
	[
	html.H4(
	"Sample Dialogs (Summary)",
	className="subsection-header",
	),
	html.Div(
	id="sample-dialogs",
	className="sample-dialogs-container",
	),
	],
	className="samples-section",
	),
	],
	className="topic-details-content",
	),
	html.Div(
	id="no-topic-selected",
	children=[
	html.Div(
	[
	html.I(
	className="fas fa-info-circle info-icon"
	),
	html.H3("No topic selected"),
	html.P(
	"Click or hover on a bubble to view topic details."
	),
	],
	className="no-selection-message",
	)
	],
	className="no-selection-container",
	),
	],
	className="sidebar-container",
	),
	],
	className="dashboard-container",
	)
	],
	id="main-content",
	style={"display": "none"},
	),
	# Store the processed data
	dcc.Store(id="stored-data"),
	],
	className="app-container",
	)

	# Define CSS for the app
	app.index_string = """
	<!DOCTYPE html>
	<html>
	<head>
	{%metas%}
	<title>Sessions Observatory by helvia.ai 🔭📊</title>
	{%favicon%}
	{%css%}
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

	:root {
	--background: hsl(210, 20%, 95%);
	--foreground: hsl(222.2, 84%, 4.9%);
	--card: hsl(0, 0%, 100%);
	--card-foreground: hsl(222.2, 84%, 4.9%);
	--popover: hsl(0, 0%, 100%);
	--popover-foreground: hsl(222.2, 84%, 4.9%);
	--primary: hsl(222.2, 47.4%, 11.2%);
	--primary-foreground: hsl(210, 40%, 98%);
	--secondary: hsl(210, 40%, 96.1%);
	--secondary-foreground: hsl(222.2, 47.4%, 11.2%);
	--muted: hsl(210, 40%, 96.1%);
	--muted-foreground: hsl(215.4, 16.3%, 46.9%);
	--accent: hsl(210, 40%, 96.1%);
	--accent-foreground: hsl(222.2, 47.4%, 11.2%);
	--destructive: hsl(0, 84.2%, 60.2%);
	--destructive-foreground: hsl(210, 40%, 98%);
	--border: hsl(214.3, 31.8%, 91.4%);
	--input: hsl(214.3, 31.8%, 91.4%);
	--ring: hsl(222.2, 84%, 4.9%);
	--radius: 0.5rem;
	}

	* {
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	font-family: 'Inter', sans-serif;
	}

	body {
	background-color: var(--background);
	color: var(--foreground);
	font-feature-settings: "rlig" 1, "calt" 1;
	}

	.app-container {
	max-width: 2500px;
	margin: 0 auto;
	padding: 1.5rem;
	background-color: var(--background);
	min-height: 100vh;
	display: flex;
	flex-direction: column;
	}

	.header-container {
	margin-bottom: 2rem;
	text-align: center;
	}

	.app-header {
	color: var(--foreground);
	margin-bottom: 0.75rem;
	font-weight: 600;
	font-size: 2rem;
	line-height: 1.2;
	}

	.app-description {
	color: var(--muted-foreground);
	font-size: 1rem;
	line-height: 1.5;
	}

	.upload-container {
	margin-bottom: 2rem;
	max-width: 800px;
	margin-left: auto;
	margin-right: auto;
	}

	.upload-content {
	display: flex;
	flex-direction: column;
	align-items: center;
	justify-content: center;
	height: 80%;
	padding: 1.5rem;
	position: relative;
	}

	.upload-text {
	font-size: 1rem;
	color: var(--primary);
	font-weight: 500;
	}

	.upload-divider {
	color: var(--muted-foreground);
	margin: 0.5rem 0;
	font-size: 0.875rem;
	}

	.upload-button {
	background-color: var(--primary);
	color: var(--primary-foreground);
	border: none;
	padding: 0.5rem 1rem;
	border-radius: var(--radius);
	font-size: 0.875rem;
	cursor: pointer;
	transition: opacity 0.2s;
	font-weight: 500;
	box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
	height: 2.5rem;
	}

	.upload-button:hover {
	opacity: 0.9;
	}

	/* Status message styling */
	.upload-status-message {
	margin-top: 1rem;
	padding: 0.75rem;
	font-weight: 500;
	text-align: center;
	border-radius: var(--radius);
	font-size: 0.875rem;
	transition: all 0.3s ease;
	background-color: var(--secondary);
	color: var(--secondary-foreground);
	}

	/* Chart controls styling */
	.chart-controls {
	margin-top: 1rem;
	display: flex;
	flex-direction: column;
	gap: 0.75rem;
	padding: 1rem;
	background-color: var(--card);
	border-radius: var(--radius);
	border: 1px solid var(--border);
	box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
	}

	.control-labels-row {
	display: flex;
	width: 100%;
	}

	.control-options-row {
	display: flex;
	width: 100%;
	}

	.control-label-container {
	padding: 0 0.5rem;
	text-align: left;
	}

	.control-label {
	font-weight: 500;
	color: var(--foreground);
	font-size: 0.875rem;
	line-height: 1.25rem;
	}

	.radio-container {
	padding: 0 0.5rem;
	width: 100%;
	}

	.radio-group {
	display: flex;
	gap: 1rem;
	}

	.radio-input {
	margin-right: 0.375rem;
	cursor: pointer;
	height: 1rem;
	width: 1rem;
	border-radius: 9999px;
	border: 1px solid var(--border);
	appearance: none;
	-webkit-appearance: none;
	background-color: var(--background);
	transition: border-color 0.2s;
	}

	.radio-input:checked {
	border-color: var(--primary);
	background-color: var(--primary);
	background-image: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
	background-size: 100% 100%;
	background-position: center;
	background-repeat: no-repeat;
	}

	.radio-label {
	font-weight: 400;
	color: var(--foreground);
	display: flex;
	align-items: center;
	cursor: pointer;
	font-size: 0.875rem;
	line-height: 1.25rem;
	}

	/* Dashboard container */
	.dashboard-container {
	display: flex;
	flex-wrap: wrap;
	gap: 1.5rem;
	flex: 1;
	height: 100%;
	}

	.chart-container {
	flex: 2.75;
	min-width: 400px;
	background: var(--card);
	border-radius: var(--radius);
	box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
	padding: 1rem;
	border: 0.75px solid var(--border);
	height: 100%;
	}

	.sidebar-container {
	flex: 1;
	min-width: 300px;
	background: var(--card);
	border-radius: var(--radius);
	box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
	padding: 1rem;
	position: relative;
	height: 100vh;
	overflow-y: auto;
	border: 1px solid var(--border);
	height: 100%;
	}

	.section-header {
	margin-bottom: 1rem;
	color: var(--foreground);
	border-bottom: 1px solid var(--border);
	padding-bottom: 0.75rem;
	font-weight: 600;
	font-size: 1.25rem;
	}

	.subsection-header {
	margin: 1rem 0 0.75rem;
	color: var(--foreground);
	font-size: 1rem;
	font-weight: 600;
	}

	.topic-title {
	font-size: 1.25rem;
	font-weight: 600;
	color: var(--foreground);
	margin-bottom: 1rem;
	padding: 0.5rem 0.75rem;
	background-color: var(--secondary);
	border-radius: var(--radius);
	}

	.metadata-container {
	display: flex;
	flex-wrap: wrap;
	gap: 0.75rem;
	margin-bottom: 1rem;
	}

	.metadata-item {
	background-color: var(--secondary);
	padding: 0.5rem 0.75rem;
	border-radius: var(--radius);
	font-size: 0.875rem;
	display: flex;
	align-items: center;
	color: var(--secondary-foreground);
	}

	.metadata-icon {
	margin-right: 0.5rem;
	color: var(--primary);
	}

	.metrics-container {
	display: flex;
	justify-content: space-between;
	gap: 0.75rem;
	margin-bottom: 0.75rem;
	}

	.metric-box {
	background-color: var(--card);
	border-radius: var(--radius);
	padding: 0.75rem;
	text-align: center;
	flex: 1;
	border: 1px solid var(--border);
	}

	.metric-box.negative {
	border-left: 3px solid var(--destructive);
	}

	.metric-box.unresolved {
	border-left: 3px solid hsl(47.9, 95.8%, 53.1%);
	}

	.metric-box.urgent {
	border-left: 3px solid hsl(217.2, 91.2%, 59.8%);
	}

	.metric-value {
	font-size: 1.5rem;
	font-weight: 600;
	margin-bottom: 0.25rem;
	color: var(--foreground);
	line-height: 1;
	}

	.metric-label {
	font-size: 0.75rem;
	color: var(--muted-foreground);
	}

	.sample-dialogs-container {
	margin-top: 0.75rem;
	}

	.dialog-item {
	background-color: var(--secondary);
	border-radius: var(--radius);
	padding: 1rem;
	margin-bottom: 0.75rem;
	border-left: 3px solid var(--primary);
	}

	.dialog-summary {
	font-size: 0.875rem;
	line-height: 1.5;
	margin-bottom: 0.5rem;
	color: var(--foreground);
	}

	.dialog-metadata {
	display: flex;
	flex-wrap: wrap;
	gap: 0.5rem;
	margin-top: 0.5rem;
	font-size: 0.75rem;
	}

	.dialog-tag {
	padding: 0.25rem 0.5rem;
	border-radius: var(--radius);
	font-size: 0.7rem;
	font-weight: 500;
	}

	.tag-sentiment {
	background-color: var(--destructive);
	color: var(--destructive-foreground);
	}

	.tag-resolution {
	background-color: hsl(47.9, 95.8%, 53.1%);
	color: hsl(222.2, 84%, 4.9%);
	}

	.tag-urgency {
	background-color: hsl(217.2, 91.2%, 59.8%);
	color: hsl(210, 40%, 98%);
	}

	.tag-chat-id {
	background-color: hsl(215.4, 16.3%, 46.9%);
	color: hsl(210, 40%, 98%);
	font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
	font-weight: 500;
	}

	.no-selection-container {
	position: absolute;
	top: 0;
	left: 0;
	right: 0;
	bottom: 0;
	display: flex;
	align-items: center;
	justify-content: center;
	background-color: hsla(0, 0%, 100%, 0.95);
	z-index: 10;
	border-radius: var(--radius);
	}

	.no-selection-message {
	text-align: center;
	color: var(--muted-foreground);
	padding: 1.5rem;
	}

	.info-icon {
	font-size: 2rem;
	margin-bottom: 0.75rem;
	color: var(--muted);
	}

	/* Tags container */
	.tags-container {
	display: flex;
	flex-wrap: wrap;
	gap: 8px;
	margin-top: 5px;
	margin-bottom: 15px;
	padding: 6px;
	border-radius: 8px;
	background-color: #f8f9fa;
	}


	.topic-tag {
	padding: 0.375rem 0.75rem;
	border-radius: var(--radius);
	font-size: 0.75rem;
	display: inline-flex;
	align-items: center;
	transition: all 0.2s ease;
	font-weight: 500;
	margin-bottom: 0.25rem;
	cursor: default;
	background-color: var(--muted);
	color: var(--muted-foreground);
	border: 1px solid var(--border);
	}

	.topic-tag {
	padding: 6px 12px;
	border-radius: 15px;
	font-size: 0.8rem;
	display: inline-flex;
	align-items: center;
	box-shadow: 0 1px 3px rgba(0,0,0,0.12);
	transition: all 0.2s ease;
	font-weight: 500;
	margin-bottom: 5px;
	cursor: default;
	border: 1px solid rgba(0,0,0,0.08);
	background-color: #6c757d; /* Consistent medium gray color */
	color: white;
	}

	.topic-tag:hover {
	transform: translateY(-1px);
	box-shadow: 0 3px 5px rgba(0,0,0,0.15);
	background-color: #5a6268; /* Slightly darker on hover */
	}

	.topic-tag-icon {
	margin-right: 5px;
	font-size: 0.7rem;
	opacity: 0.8;
	color: rgba(255, 255, 255, 0.9);
	}

	.no-tags-message {
	color: var(--muted-foreground);
	font-style: italic;
	padding: 0.75rem;
	text-align: center;
	width: 100%;
	}

	/* Responsive adjustments */
	@media (max-width: 768px) {
	.dashboard-container {
	flex-direction: column;
	}
	.chart-container, .sidebar-container {
	width: 100%;
	}
	.app-header {
	font-size: 1.5rem;
	}
	}
	</style>
	</head>
	<body>
	{%app_entry%}
	<footer>
	{%config%}
	{%scripts%}
	{%renderer%}
	</footer>
	</body>
	</html>
	"""


	@callback(
	Output("topic-distribution-header", "children"),
	Input("stored-data", "data"),
	)
	def update_topic_distribution_header(data):
	if not data:
	return "Sessions Observatory" # Default when no data is available

	df = pd.DataFrame(data)
	total_dialogs = df["count"].sum() # Sum up the 'count' column
	return f"Sessions Observatory ({total_dialogs} dialogs)"


	# Define callback to process uploaded file
	@callback(
	[
	Output("stored-data", "data"),
	Output("upload-status", "children"),
	Output("upload-status", "style"), # Add style output for visibility
	Output("main-content", "style"),
	],
	[Input("upload-data", "contents")],
	[State("upload-data", "filename")],
	)
	def process_upload(contents, filename):
	if contents is None:
	return None, "", {"display": "none"}, {"display": "none"} # Keep hidden

	try:
	# Parse uploaded file
	content_type, content_string = contents.split(",")
	decoded = base64.b64decode(content_string)

	if "csv" in filename.lower():
	df = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
	elif "xls" in filename.lower():
	df = pd.read_excel(io.BytesIO(decoded))
	else:
	return (
	None,
	html.Div(
	[
	html.I(
	className="fas fa-exclamation-circle",
	style={"color": "var(--destructive)", "marginRight": "8px"},
	),
	"Please upload a CSV or Excel file.",
	],
	style={"color": "var(--destructive)"},
	),
	{"display": "block"}, # Make visible after error
	{"display": "none"},
	)

	# Process the dataframe to get topic statistics
	topic_stats = analyze_topics(df)

	return (
	topic_stats.to_dict("records"),
	html.Div(
	[
	html.I(
	className="fas fa-check-circle",
	style={
	"color": "hsl(142.1, 76.2%, 36.3%)",
	"marginRight": "8px",
	},
	),
	f'Successfully uploaded "{filename}"',
	],
	style={"color": "hsl(142.1, 76.2%, 36.3%)"},
	),
	{"display": "block"}, # maybe add the above line here too #TODO
	{
	"display": "block",
	"height": "calc(100vh - 40px)",
	}, # Make visible after successful upload
	)

	except Exception as e:
	return (
	None,
	html.Div(
	[
	html.I(
	className="fas fa-exclamation-triangle",
	style={"color": "var(--destructive)", "marginRight": "8px"},
	),
	f"Error processing file: {str(e)}",
	],
	style={"color": "var(--destructive)"},
	),
	{"display": "block"}, # Make visible after error
	{"display": "none"},
	)


	# Function to analyze the topics and create statistics
	def analyze_topics(df):
	# Group by topic name and calculate metrics
	topic_stats = (
	df.groupby("deduplicated_topic_name")
	.agg(
	count=("id", "count"),
	negative_count=("Sentiment", lambda x: (x == "negative").sum()),
	unresolved_count=("Resolution", lambda x: (x == "unresolved").sum()),
	urgent_count=("Urgency", lambda x: (x == "urgent").sum()),
	)
	.reset_index()
	)

	# Calculate rates
	topic_stats["negative_rate"] = (
	topic_stats["negative_count"] / topic_stats["count"] * 100
	).round(1)
	topic_stats["unresolved_rate"] = (
	topic_stats["unresolved_count"] / topic_stats["count"] * 100
	).round(1)
	topic_stats["urgent_rate"] = (
	topic_stats["urgent_count"] / topic_stats["count"] * 100
	).round(1)

	# Apply binned layout
	topic_stats = apply_binned_layout(topic_stats)

	return topic_stats


	# New binned layout function


	def apply_binned_layout(df, padding=0, bin_config=None, max_items_per_row=6):
	"""
	Apply a binned layout where bubbles are grouped into rows based on dialog count.
	Bubbles in each row will be centered horizontally.

	Args:
	df: DataFrame containing the topic data
	padding: Padding from edges as percentage
	bin_config: List of tuples defining bin ranges and descriptions.
	Example: [(300, None, "300+ dialogs"), (250, 299, "250-299 dialogs"), ...]
	max_items_per_row: Maximum number of items to display in a single row

	Returns:
	DataFrame with updated x, y positions
	"""
	# Create a copy of the dataframe to avoid modifying the original
	df_sorted = df.copy()

	# Default bin configuration if none is provided
	# 8 rows x 6 bubbles is usually good
	if bin_config is None:
	bin_config = [
	(100, None, "100+ dialogs"),
	(50, 99, "50-99 dialogs"),
	(25, 49, "25-49 dialogs"),
	(9, 24, "9-24 dialogs"),
	(7, 8, "7-8 dialogs"),
	(5, 7, "5-6 dialogs"),
	(4, 4, "4 dialogs"),
	(0, 3, "0-3 dialogs"),
	]

	# Generate bin descriptions and conditions dynamically
	bin_descriptions = {}
	conditions = []
	bin_values = []

	for i, (lower, upper, description) in enumerate(bin_config):
	bin_name = f"Bin {i + 1}"
	bin_descriptions[bin_name] = description
	bin_values.append(bin_name)

	if upper is None: # No upper limit
	conditions.append(df_sorted["count"] >= lower)
	else:
	conditions.append(
	(df_sorted["count"] >= lower) & (df_sorted["count"] <= upper)
	)

	# Apply the conditions to create the bin column
	df_sorted["bin"] = np.select(conditions, bin_values, default="Bin 8")
	df_sorted["bin_description"] = df_sorted["bin"].map(bin_descriptions)

	# Sort by bin (ascending to get Bin 1 first) and by count (descending) within each bin
	df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])

	# Now split bins that have more than max_items_per_row items
	original_bins = df_sorted["bin"].unique()
	new_rows = []
	new_bin_descriptions = bin_descriptions.copy()

	for bin_name in original_bins:
	bin_mask = df_sorted["bin"] == bin_name
	bin_group = df_sorted[bin_mask]
	bin_size = len(bin_group)

	# If bin has more items than max_items_per_row, split it
	if bin_size > max_items_per_row:
	# Calculate how many sub-bins we need
	num_sub_bins = (bin_size + max_items_per_row - 1) // max_items_per_row

	# Calculate items per sub-bin (distribute evenly)
	items_per_sub_bin = [bin_size // num_sub_bins] * num_sub_bins

	# Distribute the remainder one by one to achieve balance
	remainder = bin_size % num_sub_bins
	for i in range(remainder):
	items_per_sub_bin[i] += 1

	# Original bin description
	original_description = bin_descriptions[bin_name]

	# Create new row entries and update bin assignments
	start_idx = 0
	for i in range(num_sub_bins):
	# Create new bin name with sub-bin index
	new_bin_name = f"{bin_name}_{i + 1}"

	# Create new bin description with sub-bin index
	new_description = f"{original_description} ({i + 1}/{num_sub_bins})"
	new_bin_descriptions[new_bin_name] = new_description

	# Get slice of dataframe for this sub-bin
	end_idx = start_idx + items_per_sub_bin[i]
	sub_bin_rows = bin_group.iloc[start_idx:end_idx].copy()

	# Update bin name and description
	sub_bin_rows["bin"] = new_bin_name
	sub_bin_rows["bin_description"] = new_description

	# Add to new rows
	new_rows.append(sub_bin_rows)

	# Update start index for next iteration
	start_idx = end_idx

	# Remove the original bin from df_sorted
	df_sorted = df_sorted[~bin_mask]

	# Combine the original dataframe (with small bins) and the new split bins
	if new_rows:
	df_sorted = pd.concat([df_sorted] + new_rows)

	# Re-sort with the new bin names
	df_sorted = df_sorted.sort_values(by=["bin", "count"], ascending=[True, False])

	# Calculate the vertical positions for each row (bin)
	bins_with_topics = sorted(df_sorted["bin"].unique())
	num_rows = len(bins_with_topics)

	available_height = 100 - (2 * padding)
	row_height = available_height / num_rows

	# Calculate and assign y-positions (vertical positions)
	row_positions = {}
	for i, bin_name in enumerate(bins_with_topics):
	# Calculate row position (centered within its allocated space)
	row_pos = padding + i * row_height + (row_height / 2)
	row_positions[bin_name] = row_pos

	df_sorted["y"] = df_sorted["bin"].map(row_positions)

	# Center the bubbles in each row horizontally
	center_point = 50 # Middle of the chart (0-100 scale)
	for bin_name in bins_with_topics:
	# Get topics in this bin
	bin_mask = df_sorted["bin"] == bin_name
	num_topics_in_bin = bin_mask.sum()

	if num_topics_in_bin == 1:
	# If there's only one bubble, place it in the center
	df_sorted.loc[bin_mask, "x"] = center_point
	else:
	if num_topics_in_bin < max_items_per_row:
	# For fewer bubbles, add a little bit of spacing between them
	# Calculate the total width needed
	total_width = (num_topics_in_bin - 1) * 17.5 # 10 units between bubbles
	# Calculate starting position (to center the group)
	start_pos = center_point - (total_width / 2)
	# Assign positions
	positions = [start_pos + (i * 17.5) for i in range(num_topics_in_bin)]
	df_sorted.loc[bin_mask, "x"] = positions
	else:
	# For multiple bubbles, distribute them evenly around the center
	# Calculate the total width needed
	total_width = (num_topics_in_bin - 1) * 15 # 15 units between bubbles

	# Calculate starting position (to center the group)
	start_pos = center_point - (total_width / 2)

	# Assign positions
	positions = [start_pos + (i * 15) for i in range(num_topics_in_bin)]
	df_sorted.loc[bin_mask, "x"] = positions

	# Add original rank for reference
	df_sorted["size_rank"] = range(1, len(df_sorted) + 1)

	return df_sorted


	# New function to update positions based on selected size metric
	def update_bubble_positions(df: pd.DataFrame) -> pd.DataFrame:
	# For the main chart, we always use the binned layout
	return apply_binned_layout(df)


	# Callback to update the bubble chart
	@callback(
	Output("bubble-chart", "figure"),
	[
	Input("stored-data", "data"),
	Input("color-metric", "value"),
	],
	)
	def update_bubble_chart(data, color_metric):
	if not data:
	return go.Figure()

	df = pd.DataFrame(data)

	# Update positions using binned layout
	df = update_bubble_positions(df)

	# Always use count for sizing
	size_values = df["count"]
	raw_sizes = df["count"]
	size_title = "Dialog Count"

	# Apply log scaling to the size values for better visualization
	# To make the smallest bubble bigger, increase the min_size value (currently 2.5).
	min_size = 1 # Minimum bubble size
	if size_values.max() > size_values.min():
	# Log-scale the sizes
	log_sizes = np.log1p(size_values)
	# Scale to a reasonable range for visualization
	# To make the biggest bubble smaller, reduce the multiplier (currently 50).
	size_values = (
	min_size
	+ (log_sizes - log_sizes.min()) / (log_sizes.max() - log_sizes.min()) * 50
	)
	else:
	# If all values are the same, use a default size
	size_values = np.ones(len(df)) * 12.5

	# DEBUG: Print sizes of bubbles in the first and second bins
	bins = sorted(df["bin"].unique())
	if len(bins) >= 1:
	first_bin = bins[0]
	print(f"DEBUG - First bin '{first_bin}' bubble sizes:")
	first_bin_df = df[df["bin"] == first_bin]
	for idx, row in first_bin_df.iterrows():
	print(
	f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
	)

	if len(bins) >= 2:
	second_bin = bins[1]
	print(f"DEBUG - Second bin '{second_bin}' bubble sizes:")
	second_bin_df = df[df["bin"] == second_bin]
	for idx, row in second_bin_df.iterrows():
	print(
	f" Topic: {row['deduplicated_topic_name']}, Raw size: {row['count']}, Displayed size: {size_values[idx]}"
	)

	# Determine color based on selected metric
	if color_metric == "negative_rate":
	color_values = df["negative_rate"]
	# color_title = "Negative Sentiment (%)"
	color_title = "Negativity (%)"
	# color_scale = "RdBu" # no ice, RdBu - og is Reds - matter is good too
	# color_scale = "Portland"
	# color_scale = "RdYlGn_r"
	# color_scale = "Teal"
	color_scale = "Teal"

	elif color_metric == "unresolved_rate":
	color_values = df["unresolved_rate"]
	color_title = "Unresolved (%)"
	# color_scale = "Burg" # og is YlOrRd
	# color_scale = "Temps"
	# color_scale = "Armyrose"
	# color_scale = "YlOrRd"
	color_scale = "Teal"
	else:
	color_values = df["urgent_rate"]
	color_title = "Urgency (%)"
	# color_scale = "Magenta" # og is Blues
	# color_scale = "Tealrose"
	# color_scale = "Portland"
	color_scale = "Teal"

	# Set all text positions to bottom for consistent layout
	text_positions = ["bottom center"] * len(df)

	# Create enhanced hover text that includes bin information
	hover_text = [
	f"Topic: {topic}<br>{size_title}: {raw:.1f}<br>{color_title}: {color:.1f}<br>Group: {bin_desc}"
	for topic, raw, color, bin_desc in zip(
	df["deduplicated_topic_name"],
	raw_sizes,
	color_values,
	df["bin_description"],
	)
	]

	# Create bubble chart
	fig = px.scatter(
	df,
	x="x",
	y="y",
	size=size_values,
	color=color_values,
	# text="deduplicated_topic_name", # Remove text here
	hover_name="deduplicated_topic_name",
	hover_data={
	"x": False,
	"y": False,
	"bin_description": True,
	},
	size_max=42.5, # Maximum size of the bubbles, change this to adjust the size
	color_continuous_scale=color_scale,
	custom_data=[
	"deduplicated_topic_name",
	"count",
	"negative_rate",
	"unresolved_rate",
	"urgent_rate",
	"bin_description",
	],
	)

	# Update traces: Remove text related properties
	fig.update_traces(
	mode="markers", # Remove '+text'
	marker=dict(sizemode="area", opacity=0.8, line=dict(width=1, color="white")),
	hovertemplate="%{hovertext}<extra></extra>",
	hovertext=hover_text,
	)

	# Create annotations for the bubbles
	annotations = []
	for i, row in df.iterrows():
	# Wrap text every 2 words
	words = row["deduplicated_topic_name"].split()
	wrapped_text = "<br>".join(
	[" ".join(words[i : i + 4]) for i in range(0, len(words), 4)]
	)

	# Calculate size for vertical offset (approximately based on the bubble size)
	# Add vertical offset based on bubble size to place text below the bubble
	marker_size = (
	size_values[i] / 20 # type: ignore # FIXME: size_values[df.index.get_loc(i)] / 20
	) # Adjust this divisor as needed to get proper spacing

	annotations.append(
	dict(
	x=row["x"],
	y=row["y"]
	+ 0.125 # Adding this so in a row with maximum bubbles, the left one does not overlap with the bin label
	+ marker_size, # Add vertical offset to position text below the bubble
	text=wrapped_text,
	showarrow=False,
	textangle=0,
	font=dict(
	size=10,
	# size=8,
	color="var(--foreground)",
	family="Arial, sans-serif",
	weight="bold",
	),
	xanchor="center",
	yanchor="top", # Anchor to top of text box so it hangs below the bubble
	bgcolor="rgba(255,255,255,0.7)", # Add semi-transparent background for better readability
	bordercolor="rgba(0,0,0,0.1)", # Add a subtle border color
	borderwidth=1,
	borderpad=1,
	# TODO: Radius for rounded corners
	)
	)

	# Add bin labels and separator lines
	unique_bins = sorted(df["bin"].unique())
	bin_y_positions = [
	df[df["bin"] == bin_name]["y"].mean() for bin_name in unique_bins
	]

	# Dynamically extract bin descriptions
	bin_descriptions = df.set_index("bin")["bin_description"].to_dict()

	for bin_name, bin_y in zip(unique_bins, bin_y_positions):
	# Add horizontal line
	fig.add_shape(
	type="line",
	x0=0,
	y0=bin_y,
	x1=100,
	y1=bin_y,
	line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
	layer="below",
	)

	# Add subtle lines for each bin and bin labels
	for bin_name, bin_y in zip(unique_bins, bin_y_positions):
	# Add horizontal line
	fig.add_shape(
	type="line",
	x0=0,
	y0=bin_y,
	x1=100,
	y1=bin_y,
	line=dict(color="rgba(0,0,0,0.1)", width=1, dash="dot"),
	layer="below",
	)

	# Add bin label annotation
	annotations.append(
	dict(
	x=0, # Position the label on the left side
	y=bin_y,
	xref="x",
	yref="y",
	text=bin_descriptions[bin_name],
	showarrow=False,
	font=dict(size=8.25, color="var(--muted-foreground)"),
	align="left",
	xanchor="left",
	yanchor="middle",
	bgcolor="rgba(255,255,255,0.7)",
	borderpad=1,
	)
	)

	fig.update_layout(
	title=None,
	xaxis=dict(
	showgrid=False,
	zeroline=False,
	showticklabels=False,
	title=None,
	range=[0, 100],
	),
	yaxis=dict(
	showgrid=False,
	zeroline=False,
	showticklabels=False,
	title=None,
	range=[0, 100],
	autorange="reversed", # Keep largest at top
	),
	hovermode="closest",
	margin=dict(l=0, r=0, t=10, b=10),
	coloraxis_colorbar=dict(
	title=color_title,
	title_font=dict(size=9),
	tickfont=dict(size=8),
	thickness=10,
	len=0.6,
	yanchor="middle",
	y=0.5,
	xpad=0,
	),
	legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
	paper_bgcolor="rgba(0,0,0,0)",
	plot_bgcolor="rgba(0,0,0,0)",
	hoverlabel=dict(bgcolor="white", font_size=12, font_family="Inter"),
	annotations=annotations, # Add bin labels as annotations
	)

	return fig


	# Update the update_topic_details callback to use grayscale colors for tags based on frequency
	@callback(
	[
	Output("topic-title", "children"),
	Output("topic-metadata", "children"),
	Output("topic-metrics", "children"),
	Output("important-tags", "children"),
	Output("sample-dialogs", "children"),
	Output("no-topic-selected", "style"),
	],
	[Input("bubble-chart", "hoverData"), Input("bubble-chart", "clickData")],
	[State("stored-data", "data"), State("upload-data", "contents")],
	)
	def update_topic_details(hover_data, click_data, stored_data, file_contents):
	# Determine which data to use (prioritize click over hover)
	hover_info = hover_data or click_data

	if not hover_info or not stored_data or not file_contents:
	return "", [], [], "", [], {"display": "flex"}

	# Extract topic name from the hover data
	topic_name = hover_info["points"][0]["customdata"][0]

	# Get stored data for this topic
	df_stored = pd.DataFrame(stored_data)
	topic_data = df_stored[df_stored["deduplicated_topic_name"] == topic_name].iloc[0]

	# Get original data to sample conversations
	content_type, content_string = file_contents.split(",")
	decoded = base64.b64decode(content_string)

	if (
	content_type
	== "data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64"
	):
	df_full = pd.read_excel(io.BytesIO(decoded))
	else: # Assume CSV
	df_full = pd.read_csv(io.StringIO(decoded.decode("utf-8")))

	# Filter to this topic
	topic_conversations = df_full[df_full["deduplicated_topic_name"] == topic_name]

	# Create the title
	title = html.Div([html.Span(topic_name)])

	# Create metadata items
	metadata_items = [
	html.Div(
	[
	html.I(className="fas fa-comments metadata-icon"),
	html.Span(f"{int(topic_data['count'])} dialogs"),
	],
	className="metadata-item",
	),
	]

	# Create metrics boxes
	metrics_boxes = [
	html.Div(
	[
	html.Div(f"{topic_data['negative_rate']}%", className="metric-value"),
	html.Div("Negative Sentiment", className="metric-label"),
	],
	className="metric-box negative",
	),
	html.Div(
	[
	html.Div(f"{topic_data['unresolved_rate']}%", className="metric-value"),
	html.Div("Unresolved", className="metric-label"),
	],
	className="metric-box unresolved",
	),
	html.Div(
	[
	html.Div(f"{topic_data['urgent_rate']}%", className="metric-value"),
	html.Div("Urgent", className="metric-label"),
	],
	className="metric-box urgent",
	),
	]

	# New: Extract and process consolidated_tags with improved styling
	tags_list = []
	for _, row in topic_conversations.iterrows():
	tags_str = row.get("consolidated_tags", "")
	if pd.notna(tags_str):
	tags = [tag.strip() for tag in tags_str.split(",") if tag.strip()]
	tags_list.extend(tags)

	# Count tag frequencies for better insight
	tag_counts = {}
	for tag in tags_list:
	tag_counts[tag] = tag_counts.get(tag, 0) + 1

	# Sort by frequency (most common first) and then alphabetically for ties
	sorted_tags = sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))

	# Keep only the top K tags
	TOP_K = 15
	sorted_tags = sorted_tags[:TOP_K]

	if sorted_tags:
	# Create beautifully styled tags with count indicators and consistent color
	tags_output = html.Div(
	[
	html.Div(
	[
	html.I(className="fas fa-tag topic-tag-icon"),
	html.Span(f"{tag} ({count})"),
	],
	className="topic-tag",
	)
	for tag, count in sorted_tags
	],
	className="tags-container",
	)
	else:
	tags_output = html.Div(
	[
	html.I(className="fas fa-info-circle", style={"marginRight": "5px"}),
	"No tags found for this topic",
	],
	className="no-tags-message",
	)

	# Sample up to 5 random dialogs
	sample_size = min(5, len(topic_conversations))
	if sample_size > 0:
	sample_indices = random.sample(range(len(topic_conversations)), sample_size)
	samples = topic_conversations.iloc[sample_indices]

	dialog_items = []
	for _, row in samples.iterrows():
	# Create dialog item with tags
	sentiment_tag = html.Span(
	row["Sentiment"], className="dialog-tag tag-sentiment"
	)
	resolution_tag = html.Span(
	row["Resolution"], className="dialog-tag tag-resolution"
	)
	urgency_tag = html.Span(row["Urgency"], className="dialog-tag tag-urgency")

	# Add Chat ID tag if 'id' column exists
	chat_id_tag = None
	if "id" in row:
	chat_id_tag = html.Span(
	f"Chat ID: {row['id']}", className="dialog-tag tag-chat-id"
	)

	# Compile all tags, including the new Chat ID tag if available
	tags = [sentiment_tag, resolution_tag, urgency_tag]
	if chat_id_tag:
	tags.append(chat_id_tag)

	dialog_items.append(
	html.Div(
	[
	html.Div(row["Summary"], className="dialog-summary"),
	html.Div(
	tags,
	className="dialog-metadata",
	),
	],
	className="dialog-item",
	)
	)

	sample_dialogs = dialog_items
	else:
	sample_dialogs = [
	html.Div(
	"No sample dialogs available for this topic.",
	style={"color": "var(--muted-foreground)"},
	)
	]

	return (
	title,
	metadata_items,
	metrics_boxes,
	tags_output,
	sample_dialogs,
	{"display": "none"},
	)


	if __name__ == "__main__":
	app.run_server(debug=False)