import pandas as pd import numpy as np from sklearn.metrics import confusion_matrix class BiasDetector: def __init__(self, model, sensitive_features=None): self.model = model self.sensitive_features = sensitive_features or [] self.metrics = {} def analyze_bias(self, X, y, sensitive_feature): """Analyze bias for a sensitive feature""" if sensitive_feature not in X.columns: raise ValueError(f"Sensitive feature {sensitive_feature} not found in data") # Get unique values for the sensitive feature unique_values = X[sensitive_feature].unique() results = {} for value in unique_values: # Filter data for this value mask = X[sensitive_feature] == value X_group = X[mask] y_group = y[mask] # Skip if no data if len(X_group) == 0: continue # Make predictions if hasattr(self.model, 'predict'): y_pred = self.model.predict(X_group) else: y_pred = self.model.model.predict(X_group) # Calculate metrics tn, fp, fn, tp = confusion_matrix(y_group, y_pred).ravel() # Calculate rates acceptance_rate = (tp + fp) / len(y_group) true_positive_rate = tp / (tp + fn) if (tp + fn) > 0 else 0 false_positive_rate = fp / (fp + tn) if (fp + tn) > 0 else 0 results[value] = { "count": len(X_group), "acceptance_rate": acceptance_rate, "true_positive_rate": true_positive_rate, "false_positive_rate": false_positive_rate } self.metrics[sensitive_feature] = results return results def calculate_disparate_impact(self, sensitive_feature, privileged_value): """Calculate disparate impact""" if sensitive_feature not in self.metrics: raise ValueError(f"No metrics available for {sensitive_feature}. Run analyze_bias first.") metrics = self.metrics[sensitive_feature] if privileged_value not in metrics: raise ValueError(f"Privileged value {privileged_value} not found in metrics") privileged_rate = metrics[privileged_value]["acceptance_rate"] disparate_impact = {} for value, data in metrics.items(): if value != privileged_value: unprivileged_rate = data["acceptance_rate"] # Disparate impact is the ratio of unprivileged to privileged acceptance rates # A value < 0.8 is often considered problematic di = unprivileged_rate / privileged_rate if privileged_rate > 0 else 0 disparate_impact[value] = di return disparate_impact def generate_bias_report(self): """Generate a comprehensive bias report""" report = {} for feature in self.metrics: feature_report = { "metrics": self.metrics[feature], "disparate_impact": {} } # Find privileged group (highest acceptance rate) acceptance_rates = {value: data["acceptance_rate"] for value, data in self.metrics[feature].items()} privileged_value = max(acceptance_rates, key=acceptance_rates.get) # Calculate disparate impact feature_report["disparate_impact"] = self.calculate_disparate_impact( feature, privileged_value ) # Add to report report[feature] = feature_report return report