|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
import pandas as pd
|
|
import numpy as np
|
|
import shap
|
|
|
|
def plot_feature_importance(model, feature_names=None):
|
|
"""Plot feature importance for tree-based models"""
|
|
if feature_names is None and hasattr(model, 'feature_names'):
|
|
feature_names = model.feature_names
|
|
|
|
if hasattr(model, 'model'):
|
|
importances = model.model.feature_importances_
|
|
else:
|
|
importances = model.feature_importances_
|
|
|
|
indices = np.argsort(importances)[::-1]
|
|
|
|
plt.figure(figsize=(10, 6))
|
|
plt.title('Feature Importance')
|
|
plt.bar(range(len(indices)), importances[indices], align='center')
|
|
plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90)
|
|
plt.tight_layout()
|
|
return plt
|
|
|
|
def plot_shap_summary(explainer, X):
|
|
"""Plot SHAP summary plot"""
|
|
shap_values = explainer.explainer.shap_values(X)
|
|
|
|
if isinstance(shap_values, list):
|
|
shap_values = shap_values[1]
|
|
|
|
return shap.summary_plot(shap_values, X)
|
|
|
|
def plot_what_if(model, instance, feature_to_vary, range_values, feature_names=None):
|
|
"""Plot what-if analysis for a single feature"""
|
|
instance_df = instance.copy()
|
|
predictions = []
|
|
|
|
for value in range_values:
|
|
instance_df[feature_to_vary] = value
|
|
if hasattr(model, 'predict_proba'):
|
|
pred = model.predict_proba(instance_df)[0][1]
|
|
else:
|
|
pred = model.model.predict_proba(instance_df)[0][1]
|
|
predictions.append(pred)
|
|
|
|
plt.figure(figsize=(10, 6))
|
|
plt.plot(range_values, predictions)
|
|
plt.xlabel(feature_to_vary)
|
|
plt.ylabel('Probability of Approval')
|
|
plt.title(f'What-If Analysis: Impact of {feature_to_vary} on Loan Approval')
|
|
plt.grid(True)
|
|
return plt
|
|
|
|
def plot_comparative_analysis(model, instance_a, instance_b, explainer, feature_names=None):
|
|
"""Compare two instances and their explanations"""
|
|
|
|
if hasattr(model, 'predict_proba'):
|
|
pred_a = model.predict_proba(instance_a)[0][1]
|
|
pred_b = model.predict_proba(instance_b)[0][1]
|
|
else:
|
|
pred_a = model.model.predict_proba(instance_a)[0][1]
|
|
pred_b = model.model.predict_proba(instance_b)[0][1]
|
|
|
|
|
|
shap_a = explainer.explain_instance(instance_a)
|
|
shap_b = explainer.explain_instance(instance_b)
|
|
|
|
|
|
comparison = pd.DataFrame({
|
|
'Feature': feature_names,
|
|
'Instance A': instance_a.values[0],
|
|
'Instance B': instance_b.values[0],
|
|
'SHAP A': shap_a[0],
|
|
'SHAP B': shap_b[0]
|
|
})
|
|
|
|
|
|
comparison['SHAP Diff'] = comparison['SHAP B'] - comparison['SHAP A']
|
|
|
|
|
|
comparison = comparison.iloc[np.argsort(np.abs(comparison['SHAP Diff']))[::-1]]
|
|
|
|
|
|
plt.figure(figsize=(12, 8))
|
|
sns.barplot(x='SHAP Diff', y='Feature', data=comparison.head(10))
|
|
plt.title('Top Features Explaining the Difference in Predictions')
|
|
plt.tight_layout()
|
|
return plt |