Spaces:
Build error
Build error
import pandas as pd | |
import numpy as np | |
import plotly.graph_objects as go | |
import plotly.express as px | |
from datetime import datetime, timedelta | |
from typing import Dict, List, Optional, Union, Any, Tuple | |
import io | |
import base64 | |
import matplotlib.pyplot as plt | |
from matplotlib.backends.backend_pdf import PdfPages | |
from reportlab.lib.pagesizes import letter | |
from reportlab.pdfgen import canvas | |
from reportlab.lib import colors | |
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer | |
from reportlab.lib.styles import getSampleStyleSheet | |
class Visualizer: | |
""" | |
Generate visualizations and reports for whale transaction data | |
""" | |
def __init__(self): | |
self.color_map = { | |
"buy": "green", | |
"sell": "red", | |
"transfer": "blue", | |
"other": "gray" | |
} | |
def create_transaction_timeline(self, transactions_df: pd.DataFrame) -> go.Figure: | |
""" | |
Create a timeline visualization of transactions | |
Args: | |
transactions_df: DataFrame of transactions | |
Returns: | |
Plotly figure object | |
""" | |
if transactions_df.empty: | |
fig = go.Figure() | |
fig.update_layout( | |
title="No Transaction Data Available", | |
xaxis_title="Date", | |
yaxis_title="Action", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text="No transaction data available for timeline", | |
showarrow=False, | |
font=dict(size=14) | |
) | |
return fig | |
try: | |
# Ensure timestamp column exists | |
if 'Timestamp' in transactions_df.columns: | |
timestamp_col = 'Timestamp' | |
elif 'timeStamp' in transactions_df.columns: | |
timestamp_col = 'timeStamp' | |
# Convert timestamp to datetime if it's not already | |
if not pd.api.types.is_datetime64_any_dtype(transactions_df[timestamp_col]): | |
try: | |
transactions_df[timestamp_col] = pd.to_datetime(transactions_df[timestamp_col].astype(float), unit='s') | |
except Exception as e: | |
print(f"Error converting timestamp: {str(e)}") | |
transactions_df[timestamp_col] = pd.date_range(start='2025-01-01', periods=len(transactions_df), freq='H') | |
else: | |
# Create a dummy timestamp if none exists | |
transactions_df['dummy_timestamp'] = pd.date_range(start='2025-01-01', periods=len(transactions_df), freq='H') | |
timestamp_col = 'dummy_timestamp' | |
# Create figure | |
fig = go.Figure() | |
# Add transactions to timeline | |
for idx, row in transactions_df.iterrows(): | |
# Determine transaction type | |
if 'From' in transactions_df.columns and 'To' in transactions_df.columns: | |
from_col, to_col = 'From', 'To' | |
else: | |
from_col, to_col = 'from', 'to' | |
tx_type = "other" | |
hover_text = "" | |
if pd.isna(row[from_col]) or row[from_col] == '0x0000000000000000000000000000000000000000': | |
tx_type = "buy" | |
hover_text = f"Buy: {row[to_col]}" | |
elif pd.isna(row[to_col]) or row[to_col] == '0x0000000000000000000000000000000000000000': | |
tx_type = "sell" | |
hover_text = f"Sell: {row[from_col]}" | |
else: | |
tx_type = "transfer" | |
hover_text = f"Transfer: {row[from_col]} → {row[to_col]}" | |
# Add amount to hover text if available | |
if 'Amount' in row: | |
hover_text += f"<br>Amount: {row['Amount']}" | |
elif 'value' in row: | |
hover_text += f"<br>Value: {row['value']}" | |
# Add token info if available | |
if 'tokenSymbol' in row: | |
hover_text += f"<br>Token: {row['tokenSymbol']}" | |
# Add transaction to timeline | |
fig.add_trace(go.Scatter( | |
x=[row[timestamp_col]], | |
y=[tx_type], | |
mode='markers', | |
marker=dict( | |
size=12, | |
color=self.color_map.get(tx_type, "gray"), | |
line=dict(width=1, color='black') | |
), | |
name=tx_type, | |
text=hover_text, | |
hoverinfo='text' | |
)) | |
# Update layout | |
fig.update_layout( | |
title='Whale Transaction Timeline', | |
xaxis_title='Time', | |
yaxis_title='Transaction Type', | |
height=400, | |
template='plotly_white', | |
showlegend=True, | |
hovermode='closest' | |
) | |
return fig | |
except Exception as e: | |
# If any error occurs, return a figure with error information | |
print(f"Error creating transaction timeline: {str(e)}") | |
fig = go.Figure() | |
fig.update_layout( | |
title="Error in Transaction Timeline", | |
xaxis_title="", | |
yaxis_title="", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text=f"Error generating timeline: {str(e)}", | |
showarrow=False, | |
font=dict(size=14, color="red") | |
) | |
return fig | |
def create_volume_chart(self, transactions_df: pd.DataFrame, time_window: str = 'D') -> go.Figure: | |
""" | |
Create a volume chart aggregated by time window | |
Args: | |
transactions_df: DataFrame of transactions | |
time_window: Time window for aggregation (e.g., 'D' for day, 'H' for hour) | |
Returns: | |
Plotly figure object | |
""" | |
# Create an empty figure with appropriate message if no data | |
if transactions_df.empty: | |
fig = go.Figure() | |
fig.update_layout( | |
title="No Transaction Data Available", | |
xaxis_title="Date", | |
yaxis_title="Volume", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text="No transactions found for volume analysis", | |
showarrow=False, | |
font=dict(size=14) | |
) | |
return fig | |
try: | |
# Create a deep copy to avoid modifying the original | |
df = transactions_df.copy() | |
# Ensure timestamp column exists and convert to datetime | |
if 'Timestamp' in df.columns: | |
timestamp_col = 'Timestamp' | |
elif 'timeStamp' in df.columns: | |
timestamp_col = 'timeStamp' | |
else: | |
# Create a dummy timestamp if none exists | |
df['dummy_timestamp'] = pd.date_range(start='2025-01-01', periods=len(df), freq='H') | |
timestamp_col = 'dummy_timestamp' | |
# Convert timestamp to datetime safely | |
if not pd.api.types.is_datetime64_any_dtype(df[timestamp_col]): | |
try: | |
df[timestamp_col] = pd.to_datetime(df[timestamp_col].astype(float), unit='s') | |
except Exception as e: | |
print(f"Error converting timestamp: {str(e)}") | |
df[timestamp_col] = pd.date_range(start='2025-01-01', periods=len(df), freq='H') | |
# Ensure amount column exists | |
if 'Amount' in df.columns: | |
amount_col = 'Amount' | |
elif 'tokenAmount' in df.columns: | |
amount_col = 'tokenAmount' | |
elif 'value' in df.columns: | |
# Try to adjust for decimals if 'tokenDecimal' exists | |
if 'tokenDecimal' in df.columns: | |
df['adjustedValue'] = df['value'].astype(float) / (10 ** df['tokenDecimal'].astype(int)) | |
amount_col = 'adjustedValue' | |
else: | |
amount_col = 'value' | |
else: | |
# Create a dummy amount column if none exists | |
df['dummy_amount'] = 1.0 | |
amount_col = 'dummy_amount' | |
# Alternative approach: manually aggregate by date to avoid index issues | |
df['date'] = df[timestamp_col].dt.date | |
# Group by date | |
volume_data = df.groupby('date').agg({ | |
amount_col: 'sum', | |
timestamp_col: 'count' | |
}).reset_index() | |
volume_data.columns = ['Date', 'Volume', 'Count'] | |
# Create figure | |
fig = go.Figure() | |
# Add volume bars | |
fig.add_trace(go.Bar( | |
x=volume_data['Date'], | |
y=volume_data['Volume'], | |
name='Volume', | |
marker_color='blue', | |
opacity=0.7 | |
)) | |
# Add transaction count line | |
fig.add_trace(go.Scatter( | |
x=volume_data['Date'], | |
y=volume_data['Count'], | |
name='Transaction Count', | |
mode='lines+markers', | |
marker=dict(color='red'), | |
yaxis='y2' | |
)) | |
# Update layout | |
fig.update_layout( | |
title="Transaction Volume Over Time", | |
xaxis_title="Date", | |
yaxis_title="Volume", | |
yaxis2=dict( | |
title="Transaction Count", | |
overlaying="y", | |
side="right" | |
), | |
height=500, | |
template="plotly_white", | |
hovermode="x unified", | |
legend=dict( | |
orientation="h", | |
yanchor="bottom", | |
y=1.02, | |
xanchor="right", | |
x=1 | |
) | |
) | |
return fig | |
except Exception as e: | |
# If any error occurs, return a figure with error information | |
print(f"Error in create_volume_chart: {str(e)}") | |
fig = go.Figure() | |
fig.update_layout( | |
title="Error in Volume Chart", | |
xaxis_title="", | |
yaxis_title="", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text=f"Error generating volume chart: {str(e)}", | |
showarrow=False, | |
font=dict(size=14, color="red") | |
) | |
return fig | |
def plot_volume_by_day(self, transactions_df: pd.DataFrame) -> go.Figure: | |
""" | |
Create a volume chart aggregated by day with improved visualization | |
Args: | |
transactions_df: DataFrame of transactions | |
Returns: | |
Plotly figure object | |
""" | |
# This is a wrapper around create_volume_chart that specifically uses day as the time window | |
return self.create_volume_chart(transactions_df, time_window='D') | |
def plot_transaction_flow(self, transactions_df: pd.DataFrame) -> go.Figure: | |
""" | |
Create a network flow visualization of transactions between wallets | |
Args: | |
transactions_df: DataFrame of transactions | |
Returns: | |
Plotly figure object | |
""" | |
if transactions_df.empty: | |
# Return empty figure if no data | |
fig = go.Figure() | |
fig.update_layout( | |
title="No Transaction Flow Data Available", | |
xaxis_title="", | |
yaxis_title="", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text="No transactions found for flow analysis", | |
showarrow=False, | |
font=dict(size=14) | |
) | |
return fig | |
try: | |
# Ensure from/to columns exist | |
if 'From' in transactions_df.columns and 'To' in transactions_df.columns: | |
from_col, to_col = 'From', 'To' | |
elif 'from' in transactions_df.columns and 'to' in transactions_df.columns: | |
from_col, to_col = 'from', 'to' | |
else: | |
# Create an error visualization | |
fig = go.Figure() | |
fig.update_layout( | |
title="Transaction Flow Error", | |
xaxis_title="", | |
yaxis_title="", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text="From/To columns not found in transactions data", | |
showarrow=False, | |
font=dict(size=14, color="red") | |
) | |
return fig | |
# Ensure amount column exists | |
if 'Amount' in transactions_df.columns: | |
amount_col = 'Amount' | |
elif 'tokenAmount' in transactions_df.columns: | |
amount_col = 'tokenAmount' | |
elif 'value' in transactions_df.columns: | |
# Try to adjust for decimals if 'tokenDecimal' exists | |
if 'tokenDecimal' in transactions_df.columns: | |
transactions_df['adjustedValue'] = transactions_df['value'].astype(float) / (10 ** transactions_df['tokenDecimal'].astype(int)) | |
amount_col = 'adjustedValue' | |
else: | |
amount_col = 'value' | |
else: | |
# Create an error visualization | |
fig = go.Figure() | |
fig.update_layout( | |
title="Transaction Flow Error", | |
xaxis_title="", | |
yaxis_title="", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text="Amount column not found in transactions data", | |
showarrow=False, | |
font=dict(size=14, color="red") | |
) | |
return fig | |
# Aggregate flows between wallets | |
flow_df = transactions_df.groupby([from_col, to_col]).agg({ | |
amount_col: ['sum', 'count'] | |
}).reset_index() | |
flow_df.columns = [from_col, to_col, 'Value', 'Count'] | |
# Limit to top 20 flows to keep visualization readable | |
top_flows = flow_df.sort_values('Value', ascending=False).head(20) | |
# Create Sankey diagram | |
# First, create a mapping of unique addresses to indices | |
all_addresses = pd.unique(top_flows[[from_col, to_col]].values.ravel('K')) | |
address_to_idx = {addr: i for i, addr in enumerate(all_addresses)} | |
# Create source, target, and value arrays for the Sankey diagram | |
sources = [address_to_idx[addr] for addr in top_flows[from_col]] | |
targets = [address_to_idx[addr] for addr in top_flows[to_col]] | |
values = top_flows['Value'].tolist() | |
# Create hover text | |
hover_text = [f"From: {src}<br>To: {tgt}<br>Value: {val:.2f}<br>Count: {cnt}" | |
for src, tgt, val, cnt in zip(top_flows[from_col], top_flows[to_col], | |
top_flows['Value'], top_flows['Count'])] | |
# Shorten addresses for node labels | |
node_labels = [f"{addr[:6]}...{addr[-4:]}" if len(addr) > 12 else addr | |
for addr in all_addresses] | |
# Create Sankey diagram figure | |
fig = go.Figure(data=[go.Sankey( | |
node=dict( | |
pad=15, | |
thickness=20, | |
line=dict(color="black", width=0.5), | |
label=node_labels, | |
color="blue" | |
), | |
link=dict( | |
source=sources, | |
target=targets, | |
value=values, | |
label=hover_text, | |
hovertemplate='%{label}<extra></extra>' | |
) | |
)]) | |
fig.update_layout( | |
title="Whale Transaction Flow", | |
font_size=12, | |
height=600, | |
template="plotly_white" | |
) | |
return fig | |
except Exception as e: | |
# If any error occurs, return a figure with error information | |
print(f"Error in plot_transaction_flow: {str(e)}") | |
fig = go.Figure() | |
fig.update_layout( | |
title="Error in Transaction Flow", | |
xaxis_title="", | |
yaxis_title="", | |
height=400, | |
template="plotly_white" | |
) | |
fig.add_annotation( | |
text=f"Error generating transaction flow: {str(e)}", | |
showarrow=False, | |
font=dict(size=14, color="red") | |
) | |
return fig | |
def generate_pdf_report(self, | |
transactions_df: pd.DataFrame, | |
patterns: List[Dict[str, Any]] = None, | |
price_impact: Dict[str, Any] = None, | |
alerts: List[Dict[str, Any]] = None, | |
title: str = "Whale Analysis Report", | |
start_date: datetime = None, | |
end_date: datetime = None) -> bytes: | |
""" | |
Generate a PDF report of whale activity | |
Args: | |
transactions_df: DataFrame of transactions | |
patterns: List of pattern dictionaries | |
price_impact: Dictionary of price impact analysis | |
alerts: List of alert dictionaries | |
title: Report title | |
start_date: Start date for report period | |
end_date: End date for report period | |
Returns: | |
PDF report as bytes | |
""" | |
buffer = io.BytesIO() | |
doc = SimpleDocTemplate(buffer, pagesize=letter) | |
elements = [] | |
# Add title | |
styles = getSampleStyleSheet() | |
elements.append(Paragraph(title, styles['Title'])) | |
# Add date range | |
if start_date and end_date: | |
date_range = f"Period: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}" | |
elements.append(Paragraph(date_range, styles['Heading2'])) | |
elements.append(Spacer(1, 12)) | |
# Add transaction summary | |
if not transactions_df.empty: | |
elements.append(Paragraph("Transaction Summary", styles['Heading2'])) | |
summary_data = [ | |
["Total Transactions", str(len(transactions_df))], | |
["Unique Addresses", str(len(pd.unique(transactions_df['from'].tolist() + transactions_df['to'].tolist())))] | |
] | |
# Add token breakdown if available | |
if 'tokenSymbol' in transactions_df.columns: | |
token_counts = transactions_df['tokenSymbol'].value_counts() | |
summary_data.append(["Most Common Token", f"{token_counts.index[0]} ({token_counts.iloc[0]} txns)"]) | |
summary_table = Table(summary_data) | |
summary_table.setStyle(TableStyle([ | |
('BACKGROUND', (0, 0), (0, -1), colors.lightgrey), | |
('GRID', (0, 0), (-1, -1), 1, colors.black), | |
('PADDING', (0, 0), (-1, -1), 6), | |
])) | |
elements.append(summary_table) | |
elements.append(Spacer(1, 12)) | |
# Add pattern analysis | |
if patterns: | |
elements.append(Paragraph("Trading Patterns Detected", styles['Heading2'])) | |
for i, pattern in enumerate(patterns): | |
pattern_text = f"Pattern {i+1}: {pattern.get('name', 'Unnamed')}\n" | |
pattern_text += f"Description: {pattern.get('description', 'No description')}\n" | |
if 'risk_profile' in pattern: | |
pattern_text += f"Risk Profile: {pattern['risk_profile']}\n" | |
if 'confidence' in pattern: | |
pattern_text += f"Confidence: {pattern['confidence']:.2f}\n" | |
elements.append(Paragraph(pattern_text, styles['Normal'])) | |
elements.append(Spacer(1, 6)) | |
elements.append(Spacer(1, 12)) | |
# Add price impact analysis | |
if price_impact: | |
elements.append(Paragraph("Price Impact Analysis", styles['Heading2'])) | |
impact_text = "" | |
if 'avg_impact' in price_impact: | |
impact_text += f"Average Impact: {price_impact['avg_impact']:.2f}%\n" | |
if 'max_impact' in price_impact: | |
impact_text += f"Maximum Impact: {price_impact['max_impact']:.2f}%\n" | |
if 'insights' in price_impact: | |
impact_text += f"Insights: {price_impact['insights']}\n" | |
elements.append(Paragraph(impact_text, styles['Normal'])) | |
elements.append(Spacer(1, 12)) | |
# Add alerts | |
if alerts: | |
elements.append(Paragraph("Alerts", styles['Heading2'])) | |
for alert in alerts: | |
alert_text = f"{alert.get('level', 'Info')}: {alert.get('message', 'No details')}" | |
elements.append(Paragraph(alert_text, styles['Normal'])) | |
elements.append(Spacer(1, 6)) | |
# Build the PDF | |
doc.build(elements) | |
buffer.seek(0) | |
return buffer.getvalue() | |
def generate_csv_report(self, | |
transactions_df: pd.DataFrame, | |
report_type: str = "Transaction Summary") -> str: | |
""" | |
Generate a CSV report of transaction data | |
Args: | |
transactions_df: DataFrame of transactions | |
report_type: Type of report to generate | |
Returns: | |
CSV data as string | |
""" | |
if transactions_df.empty: | |
return "No data available for report" | |
if report_type == "Transaction Summary": | |
# Return basic transaction summary | |
return transactions_df.to_csv(index=False) | |
elif report_type == "Daily Volume": | |
# Get timestamp column | |
if 'Timestamp' in transactions_df.columns: | |
timestamp_col = 'Timestamp' | |
elif 'timeStamp' in transactions_df.columns: | |
timestamp_col = 'timeStamp' | |
# Convert timestamp to datetime if needed | |
if not pd.api.types.is_datetime64_any_dtype(transactions_df[timestamp_col]): | |
try: | |
transactions_df[timestamp_col] = pd.to_datetime(transactions_df[timestamp_col].astype(float), unit='s') | |
except: | |
return "Error processing timestamp data" | |
else: | |
return "Timestamp column not found" | |
# Get amount column | |
if 'Amount' in transactions_df.columns: | |
amount_col = 'Amount' | |
elif 'tokenAmount' in transactions_df.columns: | |
amount_col = 'tokenAmount' | |
elif 'value' in transactions_df.columns: | |
amount_col = 'value' | |
else: | |
return "Amount column not found" | |
# Aggregate by day | |
transactions_df['date'] = transactions_df[timestamp_col].dt.date | |
daily_volume = transactions_df.groupby('date').agg({ | |
amount_col: 'sum', | |
'hash': 'count' # Assuming 'hash' exists for all transactions | |
}).reset_index() | |
daily_volume.columns = ['Date', 'Volume', 'Transactions'] | |
return daily_volume.to_csv(index=False) | |
else: | |
return "Unknown report type" | |
def generate_png_chart(self, | |
fig: go.Figure, | |
width: int = 1200, | |
height: int = 800) -> bytes: | |
""" | |
Convert a Plotly figure to PNG image data | |
Args: | |
fig: Plotly figure object | |
width: Image width in pixels | |
height: Image height in pixels | |
Returns: | |
PNG image as bytes | |
""" | |
img_bytes = fig.to_image(format="png", width=width, height=height) | |
return img_bytes | |