WickedFaith's picture
Upload 77 files
3efedb0 verified
raw
history blame contribute delete
3.28 kB
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import shap
def plot_feature_importance(model, feature_names=None):
"""Plot feature importance for tree-based models"""
if feature_names is None and hasattr(model, 'feature_names'):
feature_names = model.feature_names
if hasattr(model, 'model'):
importances = model.model.feature_importances_
else:
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(10, 6))
plt.title('Feature Importance')
plt.bar(range(len(indices)), importances[indices], align='center')
plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90)
plt.tight_layout()
return plt
def plot_shap_summary(explainer, X):
"""Plot SHAP summary plot"""
shap_values = explainer.explainer.shap_values(X)
if isinstance(shap_values, list):
shap_values = shap_values[1] # For binary classification
return shap.summary_plot(shap_values, X)
def plot_what_if(model, instance, feature_to_vary, range_values, feature_names=None):
"""Plot what-if analysis for a single feature"""
instance_df = instance.copy()
predictions = []
for value in range_values:
instance_df[feature_to_vary] = value
if hasattr(model, 'predict_proba'):
pred = model.predict_proba(instance_df)[0][1]
else:
pred = model.model.predict_proba(instance_df)[0][1]
predictions.append(pred)
plt.figure(figsize=(10, 6))
plt.plot(range_values, predictions)
plt.xlabel(feature_to_vary)
plt.ylabel('Probability of Approval')
plt.title(f'What-If Analysis: Impact of {feature_to_vary} on Loan Approval')
plt.grid(True)
return plt
def plot_comparative_analysis(model, instance_a, instance_b, explainer, feature_names=None):
"""Compare two instances and their explanations"""
# Get predictions
if hasattr(model, 'predict_proba'):
pred_a = model.predict_proba(instance_a)[0][1]
pred_b = model.predict_proba(instance_b)[0][1]
else:
pred_a = model.model.predict_proba(instance_a)[0][1]
pred_b = model.model.predict_proba(instance_b)[0][1]
# Get explanations
shap_a = explainer.explain_instance(instance_a)
shap_b = explainer.explain_instance(instance_b)
# Create comparison dataframe
comparison = pd.DataFrame({
'Feature': feature_names,
'Instance A': instance_a.values[0],
'Instance B': instance_b.values[0],
'SHAP A': shap_a[0],
'SHAP B': shap_b[0]
})
# Calculate difference in SHAP values
comparison['SHAP Diff'] = comparison['SHAP B'] - comparison['SHAP A']
# Sort by absolute difference
comparison = comparison.iloc[np.argsort(np.abs(comparison['SHAP Diff']))[::-1]]
# Plot top differences
plt.figure(figsize=(12, 8))
sns.barplot(x='SHAP Diff', y='Feature', data=comparison.head(10))
plt.title('Top Features Explaining the Difference in Predictions')
plt.tight_layout()
return plt