WickedFaith's picture
Upload 77 files
3efedb0 verified
raw
history blame contribute delete
3.99 kB
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
class BiasDetector:
def __init__(self, model, sensitive_features=None):
self.model = model
self.sensitive_features = sensitive_features or []
self.metrics = {}
def analyze_bias(self, X, y, sensitive_feature):
"""Analyze bias for a sensitive feature"""
if sensitive_feature not in X.columns:
raise ValueError(f"Sensitive feature {sensitive_feature} not found in data")
# Get unique values for the sensitive feature
unique_values = X[sensitive_feature].unique()
results = {}
for value in unique_values:
# Filter data for this value
mask = X[sensitive_feature] == value
X_group = X[mask]
y_group = y[mask]
# Skip if no data
if len(X_group) == 0:
continue
# Make predictions
if hasattr(self.model, 'predict'):
y_pred = self.model.predict(X_group)
else:
y_pred = self.model.model.predict(X_group)
# Calculate metrics
tn, fp, fn, tp = confusion_matrix(y_group, y_pred).ravel()
# Calculate rates
acceptance_rate = (tp + fp) / len(y_group)
true_positive_rate = tp / (tp + fn) if (tp + fn) > 0 else 0
false_positive_rate = fp / (fp + tn) if (fp + tn) > 0 else 0
results[value] = {
"count": len(X_group),
"acceptance_rate": acceptance_rate,
"true_positive_rate": true_positive_rate,
"false_positive_rate": false_positive_rate
}
self.metrics[sensitive_feature] = results
return results
def calculate_disparate_impact(self, sensitive_feature, privileged_value):
"""Calculate disparate impact"""
if sensitive_feature not in self.metrics:
raise ValueError(f"No metrics available for {sensitive_feature}. Run analyze_bias first.")
metrics = self.metrics[sensitive_feature]
if privileged_value not in metrics:
raise ValueError(f"Privileged value {privileged_value} not found in metrics")
privileged_rate = metrics[privileged_value]["acceptance_rate"]
disparate_impact = {}
for value, data in metrics.items():
if value != privileged_value:
unprivileged_rate = data["acceptance_rate"]
# Disparate impact is the ratio of unprivileged to privileged acceptance rates
# A value < 0.8 is often considered problematic
di = unprivileged_rate / privileged_rate if privileged_rate > 0 else 0
disparate_impact[value] = di
return disparate_impact
def generate_bias_report(self):
"""Generate a comprehensive bias report"""
report = {}
for feature in self.metrics:
feature_report = {
"metrics": self.metrics[feature],
"disparate_impact": {}
}
# Find privileged group (highest acceptance rate)
acceptance_rates = {value: data["acceptance_rate"]
for value, data in self.metrics[feature].items()}
privileged_value = max(acceptance_rates, key=acceptance_rates.get)
# Calculate disparate impact
feature_report["disparate_impact"] = self.calculate_disparate_impact(
feature, privileged_value
)
# Add to report
report[feature] = feature_report
return report