Source code for dowhy.causal_estimators.generalized_linear_model_estimator
import itertools
import statsmodels.api as sm
from dowhy.causal_estimators.regression_estimator import RegressionEstimator
[docs]class GeneralizedLinearModelEstimator(RegressionEstimator):
"""Compute effect of treatment using a generalized linear model such as logistic regression.
Implementation uses statsmodels.api.GLM.
Needs an additional parameter, "glm_family" to be specified in method_params. The value of this parameter can be any valid statsmodels.api families object. For example, to use logistic regression, specify "glm_family" as statsmodels.api.families.Binomial().
"""
def __init__(self, *args, glm_family=None, predict_score=True, **kwargs):
"""For a list of args and kwargs, see documentation for
:class:`~dowhy.causal_estimator.CausalEstimator`.
:param glm_family: statsmodels family for the generalized linear model.
For example, use statsmodels.api.families.Binomial() for logistic
regression or statsmodels.api.families.Poisson() for count data.
:param predict_score: For models that have a binary output, whether
to output the model's score or the binary output based on the score.
"""
# Required to ensure that self.method_params contains all the
# parameters needed to create an object of this class
args_dict = {k: v for k, v in locals().items() if k not in type(self)._STD_INIT_ARGS}
args_dict.update(kwargs)
super().__init__(*args, **args_dict)
self.logger.info("INFO: Using Generalized Linear Model Estimator")
if glm_family is not None:
self.family = glm_family
else:
raise ValueError(
"Need to specify the family for the generalized linear model. Provide a 'glm_family' parameter in method_params, such as statsmodels.api.families.Binomial() for logistic regression."
)
self.predict_score = predict_score
# Checking if Y is binary
outcome_values = self._data[self._outcome_name].astype(int).unique()
self.outcome_is_binary = all([v in [0, 1] for v in outcome_values])
def _build_model(self):
features = self._build_features()
model = sm.GLM(self._outcome, features, family=self.family).fit()
return (features, model)
[docs] def predict_fn(self, model, features):
if self.outcome_is_binary:
if self.predict_score:
return model.predict(features)
else:
return (model.predict(features) > 0.5).astype(int)
else:
return model.predict(features)
[docs] def construct_symbolic_estimator(self, estimand):
expr = "b: " + ",".join(estimand.outcome_variable) + "~" + "Sigmoid("
var_list = estimand.treatment_variable + estimand.get_backdoor_variables()
expr += "+".join(var_list)
if self._effect_modifier_names:
interaction_terms = [
"{0}*{1}".format(x[0], x[1])
for x in itertools.product(estimand.treatment_variable, self._effect_modifier_names)
]
expr += "+" + "+".join(interaction_terms)
expr += ")"
return expr