import pandas as pd import numpy as np import plotly.graph_objects as go import plotly.express as px from datetime import datetime, timedelta from typing import Dict, List, Optional, Union, Any, Tuple import io import base64 import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas from reportlab.lib import colors from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer from reportlab.lib.styles import getSampleStyleSheet class Visualizer: """ Generate visualizations and reports for whale transaction data """ def __init__(self): self.color_map = { "buy": "green", "sell": "red", "transfer": "blue", "other": "gray" } def create_transaction_timeline(self, transactions_df: pd.DataFrame) -> go.Figure: """ Create a timeline visualization of transactions Args: transactions_df: DataFrame of transactions Returns: Plotly figure object """ if transactions_df.empty: fig = go.Figure() fig.update_layout( title="No Transaction Data Available", xaxis_title="Date", yaxis_title="Action", height=400, template="plotly_white" ) fig.add_annotation( text="No transaction data available for timeline", showarrow=False, font=dict(size=14) ) return fig try: # Ensure timestamp column exists if 'Timestamp' in transactions_df.columns: timestamp_col = 'Timestamp' elif 'timeStamp' in transactions_df.columns: timestamp_col = 'timeStamp' # Convert timestamp to datetime if it's not already if not pd.api.types.is_datetime64_any_dtype(transactions_df[timestamp_col]): try: transactions_df[timestamp_col] = pd.to_datetime(transactions_df[timestamp_col].astype(float), unit='s') except Exception as e: print(f"Error converting timestamp: {str(e)}") transactions_df[timestamp_col] = pd.date_range(start='2025-01-01', periods=len(transactions_df), freq='H') else: # Create a dummy timestamp if none exists transactions_df['dummy_timestamp'] = pd.date_range(start='2025-01-01', periods=len(transactions_df), freq='H') timestamp_col = 'dummy_timestamp' # Create figure fig = go.Figure() # Add transactions to timeline for idx, row in transactions_df.iterrows(): # Determine transaction type if 'From' in transactions_df.columns and 'To' in transactions_df.columns: from_col, to_col = 'From', 'To' else: from_col, to_col = 'from', 'to' tx_type = "other" hover_text = "" if pd.isna(row[from_col]) or row[from_col] == '0x0000000000000000000000000000000000000000': tx_type = "buy" hover_text = f"Buy: {row[to_col]}" elif pd.isna(row[to_col]) or row[to_col] == '0x0000000000000000000000000000000000000000': tx_type = "sell" hover_text = f"Sell: {row[from_col]}" else: tx_type = "transfer" hover_text = f"Transfer: {row[from_col]} → {row[to_col]}" # Add amount to hover text if available if 'Amount' in row: hover_text += f"
Amount: {row['Amount']}" elif 'value' in row: hover_text += f"
Value: {row['value']}" # Add token info if available if 'tokenSymbol' in row: hover_text += f"
Token: {row['tokenSymbol']}" # Add transaction to timeline fig.add_trace(go.Scatter( x=[row[timestamp_col]], y=[tx_type], mode='markers', marker=dict( size=12, color=self.color_map.get(tx_type, "gray"), line=dict(width=1, color='black') ), name=tx_type, text=hover_text, hoverinfo='text' )) # Update layout fig.update_layout( title='Whale Transaction Timeline', xaxis_title='Time', yaxis_title='Transaction Type', height=400, template='plotly_white', showlegend=True, hovermode='closest' ) return fig except Exception as e: # If any error occurs, return a figure with error information print(f"Error creating transaction timeline: {str(e)}") fig = go.Figure() fig.update_layout( title="Error in Transaction Timeline", xaxis_title="", yaxis_title="", height=400, template="plotly_white" ) fig.add_annotation( text=f"Error generating timeline: {str(e)}", showarrow=False, font=dict(size=14, color="red") ) return fig def create_volume_chart(self, transactions_df: pd.DataFrame, time_window: str = 'D') -> go.Figure: """ Create a volume chart aggregated by time window Args: transactions_df: DataFrame of transactions time_window: Time window for aggregation (e.g., 'D' for day, 'H' for hour) Returns: Plotly figure object """ # Create an empty figure with appropriate message if no data if transactions_df.empty: fig = go.Figure() fig.update_layout( title="No Transaction Data Available", xaxis_title="Date", yaxis_title="Volume", height=400, template="plotly_white" ) fig.add_annotation( text="No transactions found for volume analysis", showarrow=False, font=dict(size=14) ) return fig try: # Create a deep copy to avoid modifying the original df = transactions_df.copy() # Ensure timestamp column exists and convert to datetime if 'Timestamp' in df.columns: timestamp_col = 'Timestamp' elif 'timeStamp' in df.columns: timestamp_col = 'timeStamp' else: # Create a dummy timestamp if none exists df['dummy_timestamp'] = pd.date_range(start='2025-01-01', periods=len(df), freq='H') timestamp_col = 'dummy_timestamp' # Convert timestamp to datetime safely if not pd.api.types.is_datetime64_any_dtype(df[timestamp_col]): try: df[timestamp_col] = pd.to_datetime(df[timestamp_col].astype(float), unit='s') except Exception as e: print(f"Error converting timestamp: {str(e)}") df[timestamp_col] = pd.date_range(start='2025-01-01', periods=len(df), freq='H') # Ensure amount column exists if 'Amount' in df.columns: amount_col = 'Amount' elif 'tokenAmount' in df.columns: amount_col = 'tokenAmount' elif 'value' in df.columns: # Try to adjust for decimals if 'tokenDecimal' exists if 'tokenDecimal' in df.columns: df['adjustedValue'] = df['value'].astype(float) / (10 ** df['tokenDecimal'].astype(int)) amount_col = 'adjustedValue' else: amount_col = 'value' else: # Create a dummy amount column if none exists df['dummy_amount'] = 1.0 amount_col = 'dummy_amount' # Alternative approach: manually aggregate by date to avoid index issues df['date'] = df[timestamp_col].dt.date # Group by date volume_data = df.groupby('date').agg({ amount_col: 'sum', timestamp_col: 'count' }).reset_index() volume_data.columns = ['Date', 'Volume', 'Count'] # Create figure fig = go.Figure() # Add volume bars fig.add_trace(go.Bar( x=volume_data['Date'], y=volume_data['Volume'], name='Volume', marker_color='blue', opacity=0.7 )) # Add transaction count line fig.add_trace(go.Scatter( x=volume_data['Date'], y=volume_data['Count'], name='Transaction Count', mode='lines+markers', marker=dict(color='red'), yaxis='y2' )) # Update layout fig.update_layout( title="Transaction Volume Over Time", xaxis_title="Date", yaxis_title="Volume", yaxis2=dict( title="Transaction Count", overlaying="y", side="right" ), height=500, template="plotly_white", hovermode="x unified", legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 ) ) return fig except Exception as e: # If any error occurs, return a figure with error information print(f"Error in create_volume_chart: {str(e)}") fig = go.Figure() fig.update_layout( title="Error in Volume Chart", xaxis_title="", yaxis_title="", height=400, template="plotly_white" ) fig.add_annotation( text=f"Error generating volume chart: {str(e)}", showarrow=False, font=dict(size=14, color="red") ) return fig def plot_volume_by_day(self, transactions_df: pd.DataFrame) -> go.Figure: """ Create a volume chart aggregated by day with improved visualization Args: transactions_df: DataFrame of transactions Returns: Plotly figure object """ # This is a wrapper around create_volume_chart that specifically uses day as the time window return self.create_volume_chart(transactions_df, time_window='D') def plot_transaction_flow(self, transactions_df: pd.DataFrame) -> go.Figure: """ Create a network flow visualization of transactions between wallets Args: transactions_df: DataFrame of transactions Returns: Plotly figure object """ if transactions_df.empty: # Return empty figure if no data fig = go.Figure() fig.update_layout( title="No Transaction Flow Data Available", xaxis_title="", yaxis_title="", height=400, template="plotly_white" ) fig.add_annotation( text="No transactions found for flow analysis", showarrow=False, font=dict(size=14) ) return fig try: # Ensure from/to columns exist if 'From' in transactions_df.columns and 'To' in transactions_df.columns: from_col, to_col = 'From', 'To' elif 'from' in transactions_df.columns and 'to' in transactions_df.columns: from_col, to_col = 'from', 'to' else: # Create an error visualization fig = go.Figure() fig.update_layout( title="Transaction Flow Error", xaxis_title="", yaxis_title="", height=400, template="plotly_white" ) fig.add_annotation( text="From/To columns not found in transactions data", showarrow=False, font=dict(size=14, color="red") ) return fig # Ensure amount column exists if 'Amount' in transactions_df.columns: amount_col = 'Amount' elif 'tokenAmount' in transactions_df.columns: amount_col = 'tokenAmount' elif 'value' in transactions_df.columns: # Try to adjust for decimals if 'tokenDecimal' exists if 'tokenDecimal' in transactions_df.columns: transactions_df['adjustedValue'] = transactions_df['value'].astype(float) / (10 ** transactions_df['tokenDecimal'].astype(int)) amount_col = 'adjustedValue' else: amount_col = 'value' else: # Create an error visualization fig = go.Figure() fig.update_layout( title="Transaction Flow Error", xaxis_title="", yaxis_title="", height=400, template="plotly_white" ) fig.add_annotation( text="Amount column not found in transactions data", showarrow=False, font=dict(size=14, color="red") ) return fig # Aggregate flows between wallets flow_df = transactions_df.groupby([from_col, to_col]).agg({ amount_col: ['sum', 'count'] }).reset_index() flow_df.columns = [from_col, to_col, 'Value', 'Count'] # Limit to top 20 flows to keep visualization readable top_flows = flow_df.sort_values('Value', ascending=False).head(20) # Create Sankey diagram # First, create a mapping of unique addresses to indices all_addresses = pd.unique(top_flows[[from_col, to_col]].values.ravel('K')) address_to_idx = {addr: i for i, addr in enumerate(all_addresses)} # Create source, target, and value arrays for the Sankey diagram sources = [address_to_idx[addr] for addr in top_flows[from_col]] targets = [address_to_idx[addr] for addr in top_flows[to_col]] values = top_flows['Value'].tolist() # Create hover text hover_text = [f"From: {src}
To: {tgt}
Value: {val:.2f}
Count: {cnt}" for src, tgt, val, cnt in zip(top_flows[from_col], top_flows[to_col], top_flows['Value'], top_flows['Count'])] # Shorten addresses for node labels node_labels = [f"{addr[:6]}...{addr[-4:]}" if len(addr) > 12 else addr for addr in all_addresses] # Create Sankey diagram figure fig = go.Figure(data=[go.Sankey( node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=node_labels, color="blue" ), link=dict( source=sources, target=targets, value=values, label=hover_text, hovertemplate='%{label}' ) )]) fig.update_layout( title="Whale Transaction Flow", font_size=12, height=600, template="plotly_white" ) return fig except Exception as e: # If any error occurs, return a figure with error information print(f"Error in plot_transaction_flow: {str(e)}") fig = go.Figure() fig.update_layout( title="Error in Transaction Flow", xaxis_title="", yaxis_title="", height=400, template="plotly_white" ) fig.add_annotation( text=f"Error generating transaction flow: {str(e)}", showarrow=False, font=dict(size=14, color="red") ) return fig def generate_pdf_report(self, transactions_df: pd.DataFrame, patterns: List[Dict[str, Any]] = None, price_impact: Dict[str, Any] = None, alerts: List[Dict[str, Any]] = None, title: str = "Whale Analysis Report", start_date: datetime = None, end_date: datetime = None) -> bytes: """ Generate a PDF report of whale activity Args: transactions_df: DataFrame of transactions patterns: List of pattern dictionaries price_impact: Dictionary of price impact analysis alerts: List of alert dictionaries title: Report title start_date: Start date for report period end_date: End date for report period Returns: PDF report as bytes """ buffer = io.BytesIO() doc = SimpleDocTemplate(buffer, pagesize=letter) elements = [] # Add title styles = getSampleStyleSheet() elements.append(Paragraph(title, styles['Title'])) # Add date range if start_date and end_date: date_range = f"Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}" elements.append(Paragraph(date_range, styles['Heading2'])) elements.append(Spacer(1, 12)) # Add transaction summary if not transactions_df.empty: elements.append(Paragraph("Transaction Summary", styles['Heading2'])) summary_data = [ ["Total Transactions", str(len(transactions_df))], ["Unique Addresses", str(len(pd.unique(transactions_df['from'].tolist() + transactions_df['to'].tolist())))] ] # Add token breakdown if available if 'tokenSymbol' in transactions_df.columns: token_counts = transactions_df['tokenSymbol'].value_counts() summary_data.append(["Most Common Token", f"{token_counts.index[0]} ({token_counts.iloc[0]} txns)"]) summary_table = Table(summary_data) summary_table.setStyle(TableStyle([ ('BACKGROUND', (0, 0), (0, -1), colors.lightgrey), ('GRID', (0, 0), (-1, -1), 1, colors.black), ('PADDING', (0, 0), (-1, -1), 6), ])) elements.append(summary_table) elements.append(Spacer(1, 12)) # Add pattern analysis if patterns: elements.append(Paragraph("Trading Patterns Detected", styles['Heading2'])) for i, pattern in enumerate(patterns): pattern_text = f"Pattern {i+1}: {pattern.get('name', 'Unnamed')}\n" pattern_text += f"Description: {pattern.get('description', 'No description')}\n" if 'risk_profile' in pattern: pattern_text += f"Risk Profile: {pattern['risk_profile']}\n" if 'confidence' in pattern: pattern_text += f"Confidence: {pattern['confidence']:.2f}\n" elements.append(Paragraph(pattern_text, styles['Normal'])) elements.append(Spacer(1, 6)) elements.append(Spacer(1, 12)) # Add price impact analysis if price_impact: elements.append(Paragraph("Price Impact Analysis", styles['Heading2'])) impact_text = "" if 'avg_impact' in price_impact: impact_text += f"Average Impact: {price_impact['avg_impact']:.2f}%\n" if 'max_impact' in price_impact: impact_text += f"Maximum Impact: {price_impact['max_impact']:.2f}%\n" if 'insights' in price_impact: impact_text += f"Insights: {price_impact['insights']}\n" elements.append(Paragraph(impact_text, styles['Normal'])) elements.append(Spacer(1, 12)) # Add alerts if alerts: elements.append(Paragraph("Alerts", styles['Heading2'])) for alert in alerts: alert_text = f"{alert.get('level', 'Info')}: {alert.get('message', 'No details')}" elements.append(Paragraph(alert_text, styles['Normal'])) elements.append(Spacer(1, 6)) # Build the PDF doc.build(elements) buffer.seek(0) return buffer.getvalue() def generate_csv_report(self, transactions_df: pd.DataFrame, report_type: str = "Transaction Summary") -> str: """ Generate a CSV report of transaction data Args: transactions_df: DataFrame of transactions report_type: Type of report to generate Returns: CSV data as string """ if transactions_df.empty: return "No data available for report" if report_type == "Transaction Summary": # Return basic transaction summary return transactions_df.to_csv(index=False) elif report_type == "Daily Volume": # Get timestamp column if 'Timestamp' in transactions_df.columns: timestamp_col = 'Timestamp' elif 'timeStamp' in transactions_df.columns: timestamp_col = 'timeStamp' # Convert timestamp to datetime if needed if not pd.api.types.is_datetime64_any_dtype(transactions_df[timestamp_col]): try: transactions_df[timestamp_col] = pd.to_datetime(transactions_df[timestamp_col].astype(float), unit='s') except: return "Error processing timestamp data" else: return "Timestamp column not found" # Get amount column if 'Amount' in transactions_df.columns: amount_col = 'Amount' elif 'tokenAmount' in transactions_df.columns: amount_col = 'tokenAmount' elif 'value' in transactions_df.columns: amount_col = 'value' else: return "Amount column not found" # Aggregate by day transactions_df['date'] = transactions_df[timestamp_col].dt.date daily_volume = transactions_df.groupby('date').agg({ amount_col: 'sum', 'hash': 'count' # Assuming 'hash' exists for all transactions }).reset_index() daily_volume.columns = ['Date', 'Volume', 'Transactions'] return daily_volume.to_csv(index=False) else: return "Unknown report type" def generate_png_chart(self, fig: go.Figure, width: int = 1200, height: int = 800) -> bytes: """ Convert a Plotly figure to PNG image data Args: fig: Plotly figure object width: Image width in pixels height: Image height in pixels Returns: PNG image as bytes """ img_bytes = fig.to_image(format="png", width=width, height=height) return img_bytes