|
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.metrics import confusion_matrix
|
|
|
|
class BiasDetector:
|
|
def __init__(self, model, sensitive_features=None):
|
|
self.model = model
|
|
self.sensitive_features = sensitive_features or []
|
|
self.metrics = {}
|
|
|
|
def analyze_bias(self, X, y, sensitive_feature):
|
|
"""Analyze bias for a sensitive feature"""
|
|
if sensitive_feature not in X.columns:
|
|
raise ValueError(f"Sensitive feature {sensitive_feature} not found in data")
|
|
|
|
|
|
unique_values = X[sensitive_feature].unique()
|
|
|
|
results = {}
|
|
for value in unique_values:
|
|
|
|
mask = X[sensitive_feature] == value
|
|
X_group = X[mask]
|
|
y_group = y[mask]
|
|
|
|
|
|
if len(X_group) == 0:
|
|
continue
|
|
|
|
|
|
if hasattr(self.model, 'predict'):
|
|
y_pred = self.model.predict(X_group)
|
|
else:
|
|
y_pred = self.model.model.predict(X_group)
|
|
|
|
|
|
tn, fp, fn, tp = confusion_matrix(y_group, y_pred).ravel()
|
|
|
|
|
|
acceptance_rate = (tp + fp) / len(y_group)
|
|
true_positive_rate = tp / (tp + fn) if (tp + fn) > 0 else 0
|
|
false_positive_rate = fp / (fp + tn) if (fp + tn) > 0 else 0
|
|
|
|
results[value] = {
|
|
"count": len(X_group),
|
|
"acceptance_rate": acceptance_rate,
|
|
"true_positive_rate": true_positive_rate,
|
|
"false_positive_rate": false_positive_rate
|
|
}
|
|
|
|
self.metrics[sensitive_feature] = results
|
|
return results
|
|
|
|
def calculate_disparate_impact(self, sensitive_feature, privileged_value):
|
|
"""Calculate disparate impact"""
|
|
if sensitive_feature not in self.metrics:
|
|
raise ValueError(f"No metrics available for {sensitive_feature}. Run analyze_bias first.")
|
|
|
|
metrics = self.metrics[sensitive_feature]
|
|
if privileged_value not in metrics:
|
|
raise ValueError(f"Privileged value {privileged_value} not found in metrics")
|
|
|
|
privileged_rate = metrics[privileged_value]["acceptance_rate"]
|
|
|
|
disparate_impact = {}
|
|
for value, data in metrics.items():
|
|
if value != privileged_value:
|
|
unprivileged_rate = data["acceptance_rate"]
|
|
|
|
|
|
di = unprivileged_rate / privileged_rate if privileged_rate > 0 else 0
|
|
disparate_impact[value] = di
|
|
|
|
return disparate_impact
|
|
|
|
def generate_bias_report(self):
|
|
"""Generate a comprehensive bias report"""
|
|
report = {}
|
|
|
|
for feature in self.metrics:
|
|
feature_report = {
|
|
"metrics": self.metrics[feature],
|
|
"disparate_impact": {}
|
|
}
|
|
|
|
|
|
acceptance_rates = {value: data["acceptance_rate"]
|
|
for value, data in self.metrics[feature].items()}
|
|
privileged_value = max(acceptance_rates, key=acceptance_rates.get)
|
|
|
|
|
|
feature_report["disparate_impact"] = self.calculate_disparate_impact(
|
|
feature, privileged_value
|
|
)
|
|
|
|
|
|
report[feature] = feature_report
|
|
|
|
return report |