import numpy as np
import pandas as pd
from dowhy.causal_estimator import CausalEstimator
[docs]class PropensityScoreEstimator(CausalEstimator):
"""
Base class for estimators that estimate effects based on propensity of
treatment assignment.
For a list of standard args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
Supports additional parameters as listed below.
"""
def __init__(self, *args, propensity_score_model=None,
recalculate_propensity_score=True,
propensity_score_column="propensity_score", **kwargs):
"""
:param propensity_score_model: Model used to compute propensity score.
Can be any classification model that supports fit() and
predict_proba() methods. If None, LogisticRegression is used.
:param recalculate_propensity_score: Whether the propensity score
should be estimated. To use pre-computed propensity scores,
set this value to False. Default=True.
:param propensity_score_column: Column name that stores the
propensity score. Default='propensity_score'
"""
# Required to ensure that self.method_params contains all the
# parameters to create an object of this class
args_dict = {k: v for k, v in locals().items()
if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
# Enable the user to pass params for a custom propensity model
self.propensity_score_model = propensity_score_model
self.recalculate_propensity_score = recalculate_propensity_score
self.propensity_score_column = propensity_score_column
# Check if the treatment is one-dimensional
if len(self._treatment_name) > 1:
error_msg = str(self.__class__) + "cannot handle more than one treatment variable"
raise Exception(error_msg)
# Checking if the treatment is binary
treatment_values = self._data[self._treatment_name[0]].astype(int).unique()
if any([v not in [0,1] for v in treatment_values]):
error_msg = "Propensity score methods are applicable only for binary treatments"
self.logger.error(error_msg)
raise Exception(error_msg)
self.logger.debug("Back-door variables used:" +
",".join(self._target_estimand.get_backdoor_variables()))
self._observed_common_causes_names = self._target_estimand.get_backdoor_variables()
if self._observed_common_causes_names:
self._observed_common_causes = self._data[self._observed_common_causes_names]
# Convert the categorical variables into dummy/indicator variables
# Basically, this gives a one hot encoding for each category
# The first category is taken to be the base line.
self._observed_common_causes = pd.get_dummies(self._observed_common_causes, drop_first=True)
else:
self._observed_common_causes = None
error_msg = "No common causes/confounders present. Propensity score based methods are not applicable"
self.logger.error(error_msg)
raise Exception(error_msg)
[docs] def construct_symbolic_estimator(self, estimand):
'''
A symbolic string that conveys what each estimator does.
For instance, linear regression is expressed as
y ~ bx + e
'''
raise NotImplementedError
def _estimate_effect(self):
'''
A custom estimator based on the way the propensity score estimates are to be used.
'''
raise NotImplementedError