#!/usr/bin/env python # coding: utf-8 # In[1]: import os import warnings base_path = os.path.abspath("../") os.chdir(base_path) warnings.filterwarnings('ignore') # In[2]: import pandas as pd import numpy as np import matplotlib.pyplot as plt from causalml.inference.meta import BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor from causalml.dataset.regression import synthetic_data from sklearn.linear_model import LinearRegression from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor from sklearn.tree import DecisionTreeRegressor from xgboost import XGBRegressor from lightgbm import LGBMRegressor import shap import matplotlib.pyplot as plt import time from sklearn.inspection import permutation_importance from sklearn.model_selection import train_test_split os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # for lightgbm to work get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') # In[3]: plt.style.use('fivethirtyeight') # In[4]: n_features = 25 n_samples = 10000 y, X, w, tau, b, e = synthetic_data(mode=1, n=n_samples, p=n_features, sigma=0.5) # In[5]: w_multi = np.array(['treatment_A' if x==1 else 'control' for x in w]) e_multi = {'treatment_A': e} # In[6]: feature_names = ['stars', 'tiger', 'merciful', 'quixotic', 'fireman', 'dependent', 'shelf', 'touch', 'barbarous', 'clammy', 'playground', 'rain', 'offer', 'cute', 'future', 'damp', 'nonchalant', 'change', 'rigid', 'sweltering', 'eight', 'wrap', 'lethal', 'adhesive', 'lip'] # specify feature names model_tau = LGBMRegressor(importance_type='gain') # specify model for model_tau # ## S Learner # In[7]: base_algo = LGBMRegressor() # base_algo = XGBRegressor() # base_algo = RandomForestRegressor() # base_algo = LinearRegression() slearner = BaseSRegressor(base_algo, control_name='control') slearner.estimate_ate(X, w_multi, y) # In[8]: slearner_tau = slearner.fit_predict(X, w_multi, y) # ### Feature Importance (method = `auto`) # In[9]: slearner.get_importance(X=X, tau=slearner_tau, normalize=True, method='auto', features=feature_names) # In[10]: slearner.plot_importance(X=X, tau=slearner_tau, normalize=True, method='auto', features=feature_names) # ### Feature Importance (method = `permutation`) # In[11]: slearner.get_importance(X=X, tau=slearner_tau, method='permutation', features=feature_names, random_state=42) # In[12]: start_time = time.time() slearner.get_importance(X=X, tau=slearner_tau, method='permutation', features=feature_names, random_state=42) print("Elapsed time: %s seconds" % (time.time() - start_time)) # In[13]: slearner.plot_importance(X=X, tau=slearner_tau, method='permutation', features=feature_names, random_state=42) # ### Feature Importance (`sklearn.inspection.permutation_importance`) # In[14]: start_time = time.time() X_train, X_test, y_train, y_test = train_test_split(X, slearner_tau, test_size=0.3, random_state=42) model_tau_fit = model_tau.fit(X_train, y_train) perm_imp_test = permutation_importance( estimator=model_tau_fit, X=X_test, y=y_test, random_state=42).importances_mean pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) print("Elapsed time: %s seconds" % (time.time() - start_time)) # In[15]: pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) # In[16]: pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8)) plt.title('Test Set Permutation Importances') # In[17]: perm_imp_train = permutation_importance( estimator=model_tau_fit, X=X_train, y=y_train, random_state=42).importances_mean pd.Series(perm_imp_train, feature_names).sort_values(ascending=False) # In[18]: pd.Series(perm_imp_train, feature_names).sort_values().plot(kind='barh', figsize=(12, 8)) plt.title('Training Set Permutation Importances') # ### Shapley Values # In[19]: shap_slearner = slearner.get_shap_values(X=X, tau=slearner_tau) shap_slearner # In[20]: np.mean(np.abs(shap_slearner['treatment_A']),axis=0) # In[21]: # Plot shap values without specifying shap_dict slearner.plot_shap_values(X=X, tau=slearner_tau, features=feature_names) # In[22]: # Plot shap values WITH specifying shap_dict slearner.plot_shap_values(shap_dict=shap_slearner) # In[23]: # interaction_idx set to None (no color coding for interaction effects) slearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=1, X=X, tau=slearner_tau, interaction_idx=None, shap_dict=shap_slearner) # In[24]: # interaction_idx set to 'auto' (searches for feature with greatest approximate interaction) # specify feature names slearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx='tiger', X=X, tau=slearner_tau, interaction_idx='auto', shap_dict=shap_slearner, features=feature_names) # In[25]: # interaction_idx set to specific index slearner.plot_shap_dependence(treatment_group='treatment_A', feature_idx=1, X=X, tau=slearner_tau, interaction_idx=10, shap_dict=shap_slearner, features=feature_names) # ## T Learner # In[26]: tlearner = BaseTRegressor(LGBMRegressor(), control_name='control') tlearner.estimate_ate(X, w_multi, y) # In[27]: tlearner_tau = tlearner.fit_predict(X, w_multi, y) # ### Feature Importance (method = `auto`) # In[28]: tlearner.get_importance(X=X, tau=tlearner_tau, normalize=True, method='auto', features=feature_names) # In[29]: tlearner.plot_importance(X=X, tau=tlearner_tau, normalize=True, method='auto', features=feature_names) # ### Feature Importance (method = `permutation`) # In[31]: tlearner.get_importance(X=X, tau=tlearner_tau, method='permutation', features=feature_names, random_state=42) # In[32]: tlearner.plot_importance(X=X, tau=tlearner_tau, method='permutation', features=feature_names, random_state=42) # ### Feature Importance (`sklearn.inspection.permutation_importance`) # In[33]: start_time = time.time() X_train, X_test, y_train, y_test = train_test_split(X, tlearner_tau, test_size=0.3, random_state=42) model_tau_fit = model_tau.fit(X_train, y_train) perm_imp_test = permutation_importance( estimator=model_tau_fit, X=X_test, y=y_test, random_state=42).importances_mean pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) print("Elapsed time: %s seconds" % (time.time() - start_time)) # In[34]: pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) # In[35]: pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8)) plt.title('Test Set Permutation Importances') # ### Shapley Values # In[36]: shap_tlearner = tlearner.get_shap_values(X=X, tau=tlearner_tau) shap_tlearner # In[37]: # Plot shap values without specifying shap_dict tlearner.plot_shap_values(X=X, tau=tlearner_tau, features=feature_names) # In[38]: # Plot shap values WITH specifying shap_dict tlearner.plot_shap_values(shap_dict=shap_tlearner) # ## X Learner # In[39]: xlearner = BaseXRegressor(LGBMRegressor(), control_name='control') xlearner.estimate_ate(X, e_multi, w_multi, y) # In[40]: xlearner_tau = xlearner.predict(X, e_multi, w_multi, y) # ### Feature Importance (method = `auto`) # In[41]: xlearner.get_importance(X=X, tau=xlearner_tau, normalize=True, method='auto', features=feature_names) # In[42]: xlearner.plot_importance(X=X, tau=xlearner_tau, normalize=True, method='auto', features=feature_names) # ### Feature Importance (method = `permutation`) # In[43]: xlearner.get_importance(X=X, tau=xlearner_tau, method='permutation', features=feature_names, random_state=42) # In[44]: xlearner.plot_importance(X=X, tau=xlearner_tau, method='permutation', features=feature_names, random_state=42) # ### Feature Importance (`sklearn.inspection.permutation_importance`) # In[45]: start_time = time.time() X_train, X_test, y_train, y_test = train_test_split(X, xlearner_tau, test_size=0.3, random_state=42) model_tau_fit = model_tau.fit(X_train, y_train) perm_imp_test = permutation_importance( estimator=model_tau_fit, X=X_test, y=y_test, random_state=42).importances_mean pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) print("Elapsed time: %s seconds" % (time.time() - start_time)) # In[46]: pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) # In[47]: pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8)) plt.title('Test Set Permutation Importances') # ### Shapley Values # In[48]: shap_xlearner = xlearner.get_shap_values(X=X, tau=xlearner_tau) shap_xlearner # In[49]: # shap_dict not specified xlearner.plot_shap_values(X=X, tau=xlearner_tau, features=feature_names) # In[50]: # shap_dict specified xlearner.plot_shap_values(shap_dict=shap_xlearner) # ## R Learner # In[51]: rlearner = BaseRRegressor(LGBMRegressor(), control_name='control') rlearner_tau = rlearner.fit_predict(X, e_multi, w_multi, y) # ### Feature Importance (method = `auto`) # In[52]: rlearner.get_importance(X=X, tau=rlearner_tau, normalize=True, method='auto', features=feature_names) # In[53]: rlearner.plot_importance(X=X, tau=rlearner_tau, method='auto', features=feature_names) # ### Feature Importance (method = `permutation`) # In[54]: rlearner.get_importance(X=X, tau=rlearner_tau, method='permutation', features=feature_names, random_state=42) # In[55]: rlearner.plot_importance(X=X, tau=rlearner_tau, method='permutation', features=feature_names, random_state=42) # ### Feature Importance (`sklearn.inspection.permutation_importance`) # In[56]: start_time = time.time() X_train, X_test, y_train, y_test = train_test_split(X, rlearner_tau, test_size=0.3, random_state=42) model_tau_fit = model_tau.fit(X_train, y_train) perm_imp_test = permutation_importance( estimator=model_tau_fit, X=X_test, y=y_test, random_state=42).importances_mean pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) print("Elapsed time: %s seconds" % (time.time() - start_time)) # In[57]: pd.Series(perm_imp_test, feature_names).sort_values(ascending=False) # In[58]: pd.Series(perm_imp_test, feature_names).sort_values().plot(kind='barh', figsize=(12, 8)) plt.title('Test Set Permutation Importances') # ### Shapley Values # In[59]: shap_rlearner = rlearner.get_shap_values(X=X, tau=rlearner_tau) shap_rlearner # In[60]: # without providing shap_dict rlearner.plot_shap_values(X=X, tau=rlearner_tau, features=feature_names) # In[61]: # with providing shap_dict rlearner.plot_shap_values(shap_dict=shap_rlearner) # In[ ]: