# Iterating over multiple refutation tests
The objective of this notebook is to compare the ability of refuters to detect the problems in a given set of estimators.
Note:
This notebook makes use of the optional dependencies:
- pygraphviz
- causalml

## Import Dependencies

In [1]:
from dowhy.datasets import linear_dataset
from dowhy import CausalModel
import causalml

# Config dict to set the logging level
import logging.config
DEFAULT_LOGGING = {
 'version': 1,
 'disable_existing_loggers': False,
 'loggers': {
 '': {
 'level': 'WARN',
 },
 }
}

logging.config.dictConfig(DEFAULT_LOGGING)
# Disabling warnings output
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings(action='ignore', category=DataConversionWarning)

## Inspection Parameters
These parameters give us the option of inspecting the intermediate steps to sanity check the steps performed

In [2]:
inspect_datasets = True
inspect_models = True
inspect_identified_estimands = True
inspect_estimates = True
inspect_refutations = True

## Estimator List
We pass a list of strings, corresponding to the estimators of interest

In [3]:
estimator_list = ["backdoor.propensity_score_matching", "backdoor.propensity_score_weighting", "backdoor.causalml.inference.meta.LRSRegressor"]
method_params= [ None, None, { "init_params":{} } ]

## Refuter List
A list of strings, corresponding to each refuter we wish to run

In [4]:
refuter_list = ["bootstrap_refuter", "data_subset_refuter"]

## Create the Datasets

In [5]:
# Parameters for creating the Dataset
TREATMENT_IS_BINARY = True
BETA = 10
NUM_SAMPLES = 5000
NUM_CONFOUNDERS = 5
NUM_INSTRUMENTS = 3
NUM_EFFECT_MODIFIERS = 2

# Creating a Linear Dataset with the given parameters
linear_data = linear_dataset(
 beta = BETA,
 num_common_causes = NUM_CONFOUNDERS,
 num_instruments = NUM_INSTRUMENTS,
 num_effect_modifiers = NUM_EFFECT_MODIFIERS,
 num_samples = NUM_SAMPLES,
 treatment_is_binary = True
 )
# Other datasets come here 


# Append them together in an array
datasets = [linear_data]


## Inspect Data

In [6]:
dataset_num = 1
if inspect_datasets is True:
 for data in datasets:
 print("####### Dataset {}###########################################################################################".format(dataset_num))
 print(data['df'].head())
 print("#############################################################################################################")
 dataset_num += 1

####### Dataset 1###########################################################################################
 X0 X1 Z0 Z1 Z2 W0 W1 W2 \
0 0.815561 1.382192 0.0 0.032699 0.0 -0.349572 -1.011814 -1.409937 
1 0.565588 -0.121230 0.0 0.191581 0.0 1.500906 -1.133247 -1.990940 
2 0.641187 -0.785662 1.0 0.959832 0.0 0.172375 0.482692 -2.712835 
3 0.076662 1.159605 0.0 0.428127 0.0 2.273463 0.327248 1.290102 
4 0.861989 0.494334 0.0 0.553362 0.0 0.904391 0.469244 0.403471 

 W3 W4 v0 y 
0 -0.294557 -1.073794 False -7.431833 
1 0.572827 0.111985 True 10.881939 
2 0.575473 0.166511 True 14.268640 
3 0.401685 -0.335078 True 18.930332 
4 1.122946 1.625132 True 21.673668 
#############################################################################################################


## Create the CausalModels

In [7]:
models = []
for data in datasets:
 model = CausalModel(
 data = data['df'],
 treatment = data['treatment_name'],
 outcome = data['outcome_name'],
 graph = data['gml_graph']
 )
 models.append(model)

## Inspect Models

In [8]:
model_num = 1
if inspect_models is True:
 for model in models:
 print("####### Model {}#############################################################################################".format(model_num))
 print("Common Causes:",model._common_causes)
 print("Effect Modifiers:",model._effect_modifiers)
 print("Instruments:",model._instruments)
 print("Outcome:",model._outcome)
 print("Treatment:",model._treatment)
 print("#############################################################################################################")
 model_num += 1

####### Model 1#############################################################################################
Common Causes: ['Unobserved Confounders', 'W2', 'W4', 'W3', 'W1', 'W0']
Effect Modifiers: ['X1', 'X0']
Instruments: ['Z1', 'Z2', 'Z0']
Outcome: ['y']
Treatment: ['v0']
#############################################################################################################


## Identify Effect

In [9]:
identified_estimands = []
for model in models:
 identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
 identified_estimands.append(identified_estimand)

## Identified Estimands

In [10]:
estimand_count = 1
for estimand in identified_estimands:
 print("####### Identified Estimand {}#####################################################################################".format(estimand_count))
 print(estimand)
 print("###################################################################################################################")
 estimand_count += 1

####### Identified Estimand 1#####################################################################################
Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
 d 
─────(Expectation(y|W2,W4,W3,W1,X1,W0,X0))
d[v₀] 
Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W2,W4,W3,W1,X1,W0,X0,U) = P(y|v0,W2,W4,W3,W1,X1,W0,X0)

### Estimand : 2
Estimand name: iv
Estimand expression:
Expectation(Derivative(y, [Z1, Z2, Z0])*Derivative([v0], [Z1, Z2, Z0])**(-1))
Estimand assumption 1, As-if-random: If U→→y then ¬(U →→{Z1,Z2,Z0})
Estimand assumption 2, Exclusion: If we remove {Z1,Z2,Z0}→{v0}, then ¬({Z1,Z2,Z0}→y)

### Estimand : 3
Estimand name: frontdoor
No such variable found!

###################################################################################################################


## Estimate Effect

In [11]:
estimate_list = []
for i in range(len(identified_estimands)):
 for j in range(len(estimator_list)):
 estimate = model.estimate_effect(
 identified_estimands[i],
 method_name=estimator_list[j],
 method_params=method_params[j]
 )
 estimate_list.append(estimate)

 return f(**kwargs)
 return f(**kwargs)


{'X': W2 W4 W3 W1 X1 W0 X0 \
0 -1.409937 -1.073794 -0.294557 -1.011814 1.382192 -0.349572 0.815561 
1 -1.990940 0.111985 0.572827 -1.133247 -0.121230 1.500906 0.565588 
2 -2.712835 0.166511 0.575473 0.482692 -0.785662 0.172375 0.641187 
3 1.290102 -0.335078 0.401685 0.327248 1.159605 2.273463 0.076662 
4 0.403471 1.625132 1.122946 0.469244 0.494334 0.904391 0.861989 
... ... ... ... ... ... ... ... 
4995 -1.523082 2.356658 -1.326664 0.655229 1.413582 2.273291 0.934879 
4996 -1.116186 -0.792025 -0.800518 0.969778 1.004574 1.004584 -1.038316 
4997 -1.052103 0.112904 0.015627 -1.293327 0.701666 1.729135 0.214009 
4998 0.671167 1.218955 0.577825 0.073284 -1.145990 0.997583 0.831404 
4999 -1.667117 -0.272001 0.464098 0.107621 -1.471820 1.069207 0.122401 

 X1 X0 
0 1.382192 0.815561 
1 -0.121230 0.565588 
2 -0.785662 0.641187 
3 1.159605 0.076662 
4 0.494334 0.861989 
... ... ... 
4995 1.413582 0.934879 
4996 1.004574 -1.038316 
4997 0.701666 0.214009 
4998 -1.145990 0.831404 
4999 -1.47182

## Estimate Values

In [12]:
estimand_count = 1
if inspect_estimates is True:
 for estimand in estimate_list:
 print("####### Estimand {}#######################################################################################".format(estimand_count))
 print("*** Class Name ***")
 print()
 print(estimand.params['estimator_class'])
 print()
 print(estimand)
 print("########################################################################################################")
 print()
 estimand_count += 1
 

####### Estimand 1#######################################################################################
*** Class Name ***



*** Causal Estimate ***

## Identified estimand
Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
 d 
─────(Expectation(y|W2,W4,W3,W1,X1,W0,X0))
d[v₀] 
Estimand assumption 1, Unconfoundedness: If U→{v0} and U→y then P(y|v0,W2,W4,W3,W1,X1,W0,X0,U) = P(y|v0,W2,W4,W3,W1,X1,W0,X0)

## Realized estimand
b: y~v0+W2+W4+W3+W1+X1+W0+X0
Target units: ate

## Estimate
Mean value: 13.879166147783451

########################################################################################################

####### Estimand 2#######################################################################################
*** Class Name ***



*** Causal Estimate ***

## Identified estimand
Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
 d 
─────(Expectation(y|W2,W4,W3,W1,X1,W0,X0))
d[v₀] 
Es

## Refute Estimate

In [13]:
refutation_list = []
for estimand in identified_estimands:
 for estimate in estimate_list: 
 for refuter in refuter_list:
 ref = model.refute_estimate(estimand, estimate,method_name=refuter)
 refutation_list.append(ref)

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
A column-vector y was passed when a 1d array was expected. Please cha

{'X': W2 W4 W3 W1 X1 W0 X0 \
93 -1.128470 0.803796 -0.849364 0.266145 -0.898931 1.769310 0.072095 
1088 0.002826 1.175457 -0.165811 -1.036364 1.492922 1.770450 1.134532 
2172 -0.340529 1.923116 2.123854 -1.499390 -1.321158 0.995319 -0.686868 
3285 -0.486722 1.118134 1.452272 -0.987275 0.016285 0.150814 0.190272 
1057 -0.426275 -0.112688 0.936973 0.997815 -1.062028 1.102444 2.198371 
... ... ... ... ... ... ... ... 
4823 -0.720302 0.888529 0.723755 -1.820770 -0.491517 1.266704 -0.325428 
3292 -1.883412 1.381702 0.630555 0.523012 0.600994 1.470406 1.104361 
1444 -1.077868 -0.810679 0.525418 -0.806209 -1.114339 0.198871 0.831400 
236 -0.951846 -0.671687 0.649086 -0.822443 1.232260 0.638697 0.966714 
239 -1.521737 -0.147820 -1.469830 -1.244154 1.264690 0.450694 -0.536106 

 X1 X0 
93 -0.898931 0.072095 
1088 1.492922 1.134532 
2172 -1.321158 -0.686868 
3285 0.016285 0.190272 
1057 -1.062028 2.198371 
... ... ... 
4823 -0.491517 -0.325428 
3292 0.600994 1.104361 
1444 -1.114339 0.831400 
23

## Refutation Values

In [14]:
refuter_count = 1
if inspect_refutations is True:
 for refutation in refutation_list:
 print("####### Refutation {}#######################################################################################".format(refuter_count))
 print("*** Class Name ***")
 print()
 print(refutation.refutation_type)
 print()
 print(refutation)
 print("########################################################################################################")
 print()
 refuter_count += 1

####### Refutation 1#######################################################################################
*** Class Name ***

Refute: Bootstrap Sample Dataset

Refute: Bootstrap Sample Dataset
Estimated effect:13.879166147783451
New effect:13.953445971526296
p value:0.38

########################################################################################################

####### Refutation 2#######################################################################################
*** Class Name ***

Refute: Use a subset of data

Refute: Use a subset of data
Estimated effect:13.879166147783451
New effect:13.840281043290043
p value:0.4

########################################################################################################

####### Refutation 3#######################################################################################
*** Class Name ***

Refute: Bootstrap Sample Dataset

Refute: Bootstrap Sample Dataset
Estimated effect:14.961425818165385
New effect:14.97