#!/usr/bin/env python # coding: utf-8 # In[1]: import dowhy.datasets from dowhy.do_samplers.kernel_density_sampler import KernelDensitySampler from dowhy.do_why import CausalModel from dowhy.api.causal_data_frame import CausalDataFrame import numpy as np import pandas as pd from statsmodels.api import OLS # In[2]: data = dowhy.datasets.linear_dataset(beta=5, num_common_causes=1, num_instruments = 0, num_samples=1000, treatment_is_binary=True) # In[3]: data['dot_graph'] = 'digraph { v ->y;X0-> v;X0-> y;}' df = data['df'] df['y'] = df['y'] + np.random.normal(size=len(df)) # without noise, the variance in Y|X, Z is zero, and mcmc fails. # In[4]: cdf = CausalDataFrame(df) cdf.causal.do(x={'v': 1}, variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, outcome='y', method='mcmc', common_causes=['X0'], keep_original_treatment=True, proceed_when_unidentifiable=True).groupby('v').mean().plot(y='y', kind='bar') # In[6]: cdf = CausalDataFrame(df) # In[9]: cdf_1 = cdf.causal.do(x={'v': 1}, variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, outcome='y', method='mcmc', dot_graph=data['dot_graph'], proceed_when_unidentifiable=True) cdf_0 = cdf.causal.do(x={'v': 0}, variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, outcome='y', method='mcmc', dot_graph=data['dot_graph'], proceed_when_unidentifiable=True, use_previous_sampler=True) # In[10]: cdf_0 # In[11]: cdf_1 # In[13]: cdf_1 = cdf.causal.do(x={'v': 1}, variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, outcome='y', method='mcmc', common_causes=['X0'], proceed_when_unidentifiable=True, use_previous_sampler=False) cdf_0 = cdf.causal.do(x={'v': 0}, variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, outcome='y', method='mcmc', common_causes=['X0'], proceed_when_unidentifiable=True, use_previous_sampler=True) # In[ ]: (cdf_1['y'] - cdf_0['y']).mean() # In[ ]: 1.96*(cdf_1['y'] - cdf_0['y']).std() / np.sqrt(len(cdf)) # In[ ]: model = OLS(df['y'], df[['X0', 'v']]) result = model.fit() result.summary() # In[ ]: cdf_1 # In[ ]: cdf_0 # In[ ]: cdf_do = cdf.causal.do(x={'v': 0}, variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, outcome='y', method='mcmc', common_causes=['X0'], proceed_when_unidentifiable=True, keep_original_treatment=True) # In[ ]: cdf_do # In[ ]: