Source code for dowhy.causal_estimators.linear_regression_estimator

import numpy as np
from sklearn import linear_model
import pandas as pd
from dowhy.causal_estimator import CausalEstimate
from dowhy.causal_estimator import CausalEstimator


[docs]class LinearRegressionEstimator(CausalEstimator):
    """Compute effect of treatment using linear regression.

    The coefficient of the treatment variable in the regression model is
    computed as the causal effect. Common method but the assumptions required
    are too strong. Avoid.

    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.logger.debug("Back-door variables used:" +
                          ",".join(self._target_estimand.backdoor_variables))
        self._observed_common_causes_names = self._target_estimand.backdoor_variables
        if len(self._observed_common_causes_names)>0:
            self._observed_common_causes = self._data[self._observed_common_causes_names]
            self._observed_common_causes = pd.get_dummies(self._observed_common_causes, drop_first=True)
        else:
            self._observed_common_causes = None
        self.logger.info("INFO: Using Linear Regression Estimator")
        self.symbolic_estimator = self.construct_symbolic_estimator(self._target_estimand)
        self.logger.info(self.symbolic_estimator)
        self._linear_model = None

    def _estimate_effect(self):
        treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
        if len(self._observed_common_causes_names)>0:
            features = np.concatenate((treatment_2d, self._observed_common_causes),
                                  axis=1)
        else:
            features = treatment_2d
        self._linear_model = linear_model.LinearRegression()
        self._linear_model.fit(features, self._outcome)
        coefficients = self._linear_model.coef_
        self.logger.debug("Coefficients of the fitted linear model: " +
                          ",".join(map(str, coefficients)))
        estimate = CausalEstimate(estimate=coefficients[0],
                                  target_estimand=self._target_estimand,
                                  realized_estimand_expr=self.symbolic_estimator,
                                  intercept=self._linear_model.intercept_)
        return estimate

[docs]    def construct_symbolic_estimator(self, estimand):
        expr = "b: " + ",".join(estimand.outcome_variable) + "~"
        var_list = estimand.treatment_variable + estimand.backdoor_variables
        expr += "+".join(var_list)
        return expr

    def _build_linear_model(self):
        treatment_2d = self._treatment.values.reshape(len(self._treatment), -1)
        features = np.concatenate((treatment_2d, self._observed_common_causes),
                                  axis=1)
        model = linear_model.LinearRegression()
        model.fit(features, self._outcome)
        self._linear_model = model

    def _do(self, x):
        if not self._linear_model:
            self._build_linear_model()
        interventional_treatment_2d = np.full(self._treatment.shape, x).reshape(len(self._treatment), -1)
        features = np.concatenate((interventional_treatment_2d, self._observed_common_causes),
                                  axis=1)
        interventional_outcomes = self._linear_model.predict(features)
        return interventional_outcomes.mean()