Source code for dowhy.utils.dgp

import numpy as np
import pandas as pd


[docs]class DataGeneratingProcess: DEFAULT_PERCENTILE = 0.9 def __init__(self, **kwargs): """ Base class for implementation of data generating process. Subclasses implement functions that create various data generating processes. All data generating processes are in the package "dowhy.utils.dgps". """ self.treatment = kwargs["treatment"] self.outcome = kwargs["outcome"] self.confounder = kwargs["confounder"] self.effect_modifier = kwargs["effect_modifier"] self.weights = kwargs.pop("weights", {}) self.bias = kwargs.pop("bias", {}) self.seed = kwargs.pop("seed", None) self.treatment_is_binary = kwargs.pop("treatment_is_binary", False) if self.treatment_is_binary: self.percentile = kwargs.pop("percentile", DataGeneratingProcess.DEFAULT_PERCENTILE) elif kwargs.pop("percentile", None) is not None: raise ValueError("Cannot use percentile, if the input is non-binary") else: self.percentile = "NA" self.true_value = None if self.seed is not None: np.random.seed(self.seed)
[docs] def generate_data(self): raise NotImplementedError()
[docs] def generation_process(self): raise NotImplementedError()
[docs] def convert_to_binary(self, data, deterministic=False): if deterministic: precentile = np.percentile(data, self.percentile, axis=0) binary_treat_value = data <= precentile else: temp = data.argsort(axis=0) ranks = np.empty_like(temp) ranks[temp[:, 0], 0] = np.arange(data.shape[0]) prob_t = ranks / data.shape[0] # Generating data with equal 0 and 1 (since ranks are uniformly distributed) binary_treat_value = np.random.binomial(1, prob_t[:, 0], data.shape[0]) # Flipping some values if self.percentile >= 0.5: mask = np.random.binomial( 1, (1 - self.percentile) * 2, len(binary_treat_value[binary_treat_value == 1]) ) binary_treat_value[binary_treat_value == 1] = mask * binary_treat_value[binary_treat_value == 1] else: mask = np.random.binomial(1, 1 - self.percentile * 2, len(binary_treat_value[binary_treat_value == 0])) binary_treat_value[binary_treat_value == 0] = mask + binary_treat_value[binary_treat_value == 0] binary_treat_value = binary_treat_value[:, np.newaxis] return binary_treat_value.astype(float)
def __str__(self): rep = """ treatment:{} outcome:{} confounder: {} effect_modifier: {} weights: {} bias: {} seed: {} treatment_is_binary: {} percentile: {} """.format( self.treatment, self.outcome, self.confounder, self.effect_modifier, self.weights, self.bias, self.seed, self.treatment_is_binary, self.percentile, ) return rep