Spaces:
Sleeping
Sleeping
File size: 3,283 Bytes
3efedb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import shap
def plot_feature_importance(model, feature_names=None):
"""Plot feature importance for tree-based models"""
if feature_names is None and hasattr(model, 'feature_names'):
feature_names = model.feature_names
if hasattr(model, 'model'):
importances = model.model.feature_importances_
else:
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(10, 6))
plt.title('Feature Importance')
plt.bar(range(len(indices)), importances[indices], align='center')
plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90)
plt.tight_layout()
return plt
def plot_shap_summary(explainer, X):
"""Plot SHAP summary plot"""
shap_values = explainer.explainer.shap_values(X)
if isinstance(shap_values, list):
shap_values = shap_values[1] # For binary classification
return shap.summary_plot(shap_values, X)
def plot_what_if(model, instance, feature_to_vary, range_values, feature_names=None):
"""Plot what-if analysis for a single feature"""
instance_df = instance.copy()
predictions = []
for value in range_values:
instance_df[feature_to_vary] = value
if hasattr(model, 'predict_proba'):
pred = model.predict_proba(instance_df)[0][1]
else:
pred = model.model.predict_proba(instance_df)[0][1]
predictions.append(pred)
plt.figure(figsize=(10, 6))
plt.plot(range_values, predictions)
plt.xlabel(feature_to_vary)
plt.ylabel('Probability of Approval')
plt.title(f'What-If Analysis: Impact of {feature_to_vary} on Loan Approval')
plt.grid(True)
return plt
def plot_comparative_analysis(model, instance_a, instance_b, explainer, feature_names=None):
"""Compare two instances and their explanations"""
# Get predictions
if hasattr(model, 'predict_proba'):
pred_a = model.predict_proba(instance_a)[0][1]
pred_b = model.predict_proba(instance_b)[0][1]
else:
pred_a = model.model.predict_proba(instance_a)[0][1]
pred_b = model.model.predict_proba(instance_b)[0][1]
# Get explanations
shap_a = explainer.explain_instance(instance_a)
shap_b = explainer.explain_instance(instance_b)
# Create comparison dataframe
comparison = pd.DataFrame({
'Feature': feature_names,
'Instance A': instance_a.values[0],
'Instance B': instance_b.values[0],
'SHAP A': shap_a[0],
'SHAP B': shap_b[0]
})
# Calculate difference in SHAP values
comparison['SHAP Diff'] = comparison['SHAP B'] - comparison['SHAP A']
# Sort by absolute difference
comparison = comparison.iloc[np.argsort(np.abs(comparison['SHAP Diff']))[::-1]]
# Plot top differences
plt.figure(figsize=(12, 8))
sns.barplot(x='SHAP Diff', y='Feature', data=comparison.head(10))
plt.title('Top Features Explaining the Difference in Predictions')
plt.tight_layout()
return plt |