Spaces:
Sleeping
Sleeping
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import pandas as pd | |
import numpy as np | |
import shap | |
def plot_feature_importance(model, feature_names=None): | |
"""Plot feature importance for tree-based models""" | |
if feature_names is None and hasattr(model, 'feature_names'): | |
feature_names = model.feature_names | |
if hasattr(model, 'model'): | |
importances = model.model.feature_importances_ | |
else: | |
importances = model.feature_importances_ | |
indices = np.argsort(importances)[::-1] | |
plt.figure(figsize=(10, 6)) | |
plt.title('Feature Importance') | |
plt.bar(range(len(indices)), importances[indices], align='center') | |
plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90) | |
plt.tight_layout() | |
return plt | |
def plot_shap_summary(explainer, X): | |
"""Plot SHAP summary plot""" | |
shap_values = explainer.explainer.shap_values(X) | |
if isinstance(shap_values, list): | |
shap_values = shap_values[1] # For binary classification | |
return shap.summary_plot(shap_values, X) | |
def plot_what_if(model, instance, feature_to_vary, range_values, feature_names=None): | |
"""Plot what-if analysis for a single feature""" | |
instance_df = instance.copy() | |
predictions = [] | |
for value in range_values: | |
instance_df[feature_to_vary] = value | |
if hasattr(model, 'predict_proba'): | |
pred = model.predict_proba(instance_df)[0][1] | |
else: | |
pred = model.model.predict_proba(instance_df)[0][1] | |
predictions.append(pred) | |
plt.figure(figsize=(10, 6)) | |
plt.plot(range_values, predictions) | |
plt.xlabel(feature_to_vary) | |
plt.ylabel('Probability of Approval') | |
plt.title(f'What-If Analysis: Impact of {feature_to_vary} on Loan Approval') | |
plt.grid(True) | |
return plt | |
def plot_comparative_analysis(model, instance_a, instance_b, explainer, feature_names=None): | |
"""Compare two instances and their explanations""" | |
# Get predictions | |
if hasattr(model, 'predict_proba'): | |
pred_a = model.predict_proba(instance_a)[0][1] | |
pred_b = model.predict_proba(instance_b)[0][1] | |
else: | |
pred_a = model.model.predict_proba(instance_a)[0][1] | |
pred_b = model.model.predict_proba(instance_b)[0][1] | |
# Get explanations | |
shap_a = explainer.explain_instance(instance_a) | |
shap_b = explainer.explain_instance(instance_b) | |
# Create comparison dataframe | |
comparison = pd.DataFrame({ | |
'Feature': feature_names, | |
'Instance A': instance_a.values[0], | |
'Instance B': instance_b.values[0], | |
'SHAP A': shap_a[0], | |
'SHAP B': shap_b[0] | |
}) | |
# Calculate difference in SHAP values | |
comparison['SHAP Diff'] = comparison['SHAP B'] - comparison['SHAP A'] | |
# Sort by absolute difference | |
comparison = comparison.iloc[np.argsort(np.abs(comparison['SHAP Diff']))[::-1]] | |
# Plot top differences | |
plt.figure(figsize=(12, 8)) | |
sns.barplot(x='SHAP Diff', y='Feature', data=comparison.head(10)) | |
plt.title('Top Features Explaining the Difference in Predictions') | |
plt.tight_layout() | |
return plt |