[docs]classRegressionEstimator(CausalEstimator):"""Compute effect of treatment using some regression function. Fits a regression model for estimating the outcome using treatment(s) and confounders. """def__init__(self,*args,**kwargs):super().__init__(*args,**kwargs)self.logger.debug("Back-door variables used:"+",".join(self._target_estimand.get_backdoor_variables()))self._observed_common_causes_names=self._target_estimand.get_backdoor_variables()iflen(self._observed_common_causes_names)>0:self._observed_common_causes=self._data[self._observed_common_causes_names]self._observed_common_causes=pd.get_dummies(self._observed_common_causes,drop_first=True)else:self._observed_common_causes=Noneself.symbolic_estimator=self.construct_symbolic_estimator(self._target_estimand)self.logger.info(self.symbolic_estimator)self.model=Nonedef_estimate_effect(self,data_df=None,need_conditional_estimates=None):# TODO make treatment_value and control value also as local parametersifdata_dfisNone:data_df=self._dataifneed_conditional_estimatesisNone:need_conditional_estimates=self.need_conditional_estimates# Checking if the model is already trainedifnotself.model:# The model is always built on the entire datafeatures,self.model=self._build_model()coefficients=self.model.params[1:]# first coefficient is the interceptself.logger.debug("Coefficients of the fitted model: "+",".join(map(str,coefficients)))self.logger.debug(self.model.summary())# All treatments are set to the same constant valueeffect_estimate=self._do(self._treatment_value,data_df)-self._do(self._control_value,data_df)conditional_effect_estimates=Noneifneed_conditional_estimates:conditional_effect_estimates=self._estimate_conditional_effects(self._estimate_effect_fn,effect_modifier_names=self._effect_modifier_names)intercept_parameter=self.model.params[0]estimate=CausalEstimate(estimate=effect_estimate,conditional_estimates=conditional_effect_estimates,target_estimand=self._target_estimand,realized_estimand_expr=self.symbolic_estimator,intercept=intercept_parameter)returnestimatedef_estimate_effect_fn(self,data_df):est=self._estimate_effect(data_df,need_conditional_estimates=False)returnest.valuedef_build_features(self,treatment_values=None,data_df=None):# Using all data by defaultifdata_dfisNone:data_df=self._datatreatment_vals=self._treatmentobserved_common_causes_vals=self._observed_common_causeseffect_modifiers_vals=self._effect_modifierselse:treatment_vals=data_df[self._treatment_name]iflen(self._observed_common_causes_names)>0:observed_common_causes_vals=data_df[self._observed_common_causes_names]observed_common_causes_vals=pd.get_dummies(observed_common_causes_vals,drop_first=True)ifself._effect_modifier_names:effect_modifiers_vals=data_df[self._effect_modifier_names]effect_modifiers_vals=pd.get_dummies(effect_modifiers_vals,drop_first=True)# Fixing treatment value to the specified value, if providediftreatment_valuesisnotNone:treatment_vals=treatment_valuesiftype(treatment_vals)isnotnp.ndarray:treatment_vals=treatment_vals.to_numpy()# treatment_vals and data_df should have same number of rowsiftreatment_vals.shape[0]!=data_df.shape[0]:raiseValueError("Provided treatment values and dataframe should have the same length.")# Bulding the feature matrixn_samples=treatment_vals.shape[0]treatment_2d=treatment_vals.reshape((n_samples,len(self._treatment_name)))iflen(self._observed_common_causes_names)>0:features=np.concatenate((treatment_2d,observed_common_causes_vals),axis=1)else:features=treatment_2difself._effect_modifier_names:foriinrange(treatment_2d.shape[1]):curr_treatment=treatment_2d[:,i]new_features=curr_treatment[:,np.newaxis]*effect_modifiers_vals.to_numpy()features=np.concatenate((features,new_features),axis=1)features=features.astype(float,copy=False)# converting to float in case of binary treatment and no other variablesfeatures=sm.add_constant(features,has_constant='add')# to add an intercept termreturnfeaturesdef_do(self,treatment_val,data_df=None):ifdata_dfisNone:data_df=self._dataifnotself.model:# The model is always built on the entire data_,self.model=self._build_model()# Replacing treatment values by given xinterventional_treatment_2d=np.full((data_df.shape[0],len(self._treatment_name)),treatment_val)new_features=self._build_features(treatment_values=interventional_treatment_2d,data_df=data_df)interventional_outcomes=self.model.predict(new_features)returninterventional_outcomes.mean()