[docs]classDataGeneratingProcess:DEFAULT_PERCENTILE=0.9def__init__(self,**kwargs):""" Base class for implementation of data generating process. Subclasses implement functions that create various data generating processes. All data generating processes are in the package "dowhy.utils.dgps". """self.treatment=kwargs["treatment"]self.outcome=kwargs["outcome"]self.confounder=kwargs["confounder"]self.effect_modifier=kwargs["effect_modifier"]self.weights=kwargs.pop("weights",{})self.bias=kwargs.pop("bias",{})self.seed=kwargs.pop("seed",None)self.treatment_is_binary=kwargs.pop("treatment_is_binary",False)ifself.treatment_is_binary:self.percentile=kwargs.pop("percentile",DataGeneratingProcess.DEFAULT_PERCENTILE)elifkwargs.pop("percentile",None)isnotNone:raiseValueError("Cannot use percentile, if the input is non-binary")else:self.percentile="NA"self.true_value=Noneifself.seedisnotNone:np.random.seed(self.seed)
[docs]defconvert_to_binary(self,data,deterministic=False):ifdeterministic:precentile=np.percentile(data,self.percentile,axis=0)binary_treat_value=data<=precentileelse:temp=data.argsort(axis=0)ranks=np.empty_like(temp)ranks[temp[:,0],0]=np.arange(data.shape[0])prob_t=ranks/data.shape[0]# Generating data with equal 0 and 1 (since ranks are uniformly distributed)binary_treat_value=np.random.binomial(1,prob_t[:,0],data.shape[0])# Flipping some valuesifself.percentile>=0.5:mask=np.random.binomial(1,(1-self.percentile)*2,len(binary_treat_value[binary_treat_value==1]))binary_treat_value[binary_treat_value==1]=mask*binary_treat_value[binary_treat_value==1]else:mask=np.random.binomial(1,1-self.percentile*2,len(binary_treat_value[binary_treat_value==0]))binary_treat_value[binary_treat_value==0]=mask+binary_treat_value[binary_treat_value==0]binary_treat_value=binary_treat_value[:,np.newaxis]returnbinary_treat_value.astype(float)