Source code for dowhy.gcm.fcms

"""This module defines multiple implementations of the abstract class :class:`~dowhy.gcm.graph.FunctionalCausalModel`
(FCM)

Classes in this module should be considered experimental, meaning there might be breaking API changes in the future.
"""

import copy
from abc import ABC, abstractmethod
from typing import List, Optional

import numpy as np

from dowhy.gcm.graph import FunctionalCausalModel, InvertibleFunctionalCausalModel, StochasticModel
from dowhy.gcm.util.general import is_categorical, shape_into_2d


[docs]class PredictionModel: """Represents general prediction model implementations. Each prediction model should provide a fit and a predict method."""
[docs] @abstractmethod def fit(self, X: np.ndarray, Y: np.ndarray) -> None: raise NotImplementedError
[docs] @abstractmethod def predict(self, X: np.ndarray) -> np.ndarray: raise NotImplementedError
[docs] @abstractmethod def clone(self): """ Clones the prediction model using the same hyper parameters but not fitted. :return: An unfitted clone of the prediction model. """ raise NotImplementedError
[docs]class ClassificationModel(PredictionModel):
[docs] @abstractmethod def predict_probabilities(self, X: np.array) -> np.ndarray: raise NotImplementedError
@property @abstractmethod def classes(self) -> List[str]: raise NotImplementedError
[docs]class InvertibleFunction:
[docs] @abstractmethod def evaluate(self, X: np.ndarray) -> np.ndarray: """Applies the function on the input.""" raise NotImplementedError
[docs] @abstractmethod def evaluate_inverse(self, X: np.ndarray) -> np.ndarray: """Returns the outcome of applying the inverse of the function on the inputs.""" raise NotImplementedError
[docs]class PostNonlinearModel(InvertibleFunctionalCausalModel): """ Represents an post nonlinear FCM, i.e. models of the form: Y = g(f(X) + N), where X are parent nodes of the target node Y, f an arbitrary prediction model expecting inputs from the parents X, N a noise variable and g an invertible function. """ def __init__( self, prediction_model: PredictionModel, noise_model: StochasticModel, invertible_function: InvertibleFunction ) -> None: """ :param prediction_model: The prediction model f. :param invertible_function: The invertible function g. :param noise_model: The StochasticModel to describe the distribution of the noise N. """ self._prediction_model = prediction_model self._noise_model = noise_model self._invertible_function = invertible_function
[docs] def fit(self, X: np.ndarray, Y: np.ndarray) -> None: """Fits the post non-linear model of the form Y = g(f(X) + N). Here, this consists of three steps given samples from (X, Y): 1. Transform Y via the inverse of g: g^-1(Y) = f(X) + N 2. Fit the model for f on (X, g^-1(Y)) 3. Reconstruct N based on the residual N = g^-1(Y) - f(X) Note that the noise here can be inferred uniquely if the model assumption holds. :param X: Samples from the input X. :param Y: Samples from the target Y. :return: None """ X, Y = shape_into_2d(X, Y) self._prediction_model.fit(X=X, Y=self._invertible_function.evaluate_inverse(Y)) self._noise_model.fit(X=self.estimate_noise(Y, X))
[docs] def estimate_noise(self, target_samples: np.ndarray, parent_samples: np.ndarray) -> np.ndarray: """Reconstruct the noise given samples from (X, Y). This is done by: 1. Transform Y via the inverse of g: g^-1(Y) = f(X) + N 2. Return the residual g^-1(Y) - f(X) :param target_samples: Samples from the input X. :param parent_samples: Samples from the target Y. :return: The reconstructed noise based on the given samples. """ target_samples, parent_samples = shape_into_2d(target_samples, parent_samples) return self._invertible_function.evaluate_inverse(target_samples) - self._prediction_model.predict( parent_samples )
[docs] def draw_noise_samples(self, num_samples: int) -> np.ndarray: """Draws samples from the noise distribution N. :param num_samples: Number of noise samples. :return: A numpy array containing num_samples samples from the noise. """ return self._noise_model.draw_samples(num_samples)
[docs] def evaluate(self, parent_samples: np.ndarray, noise_samples: np.ndarray) -> np.ndarray: """Evaluates the post non-linear model given samples (X, N). This is done by: 1. Evaluate f(X) 2. Evaluate f(X) + N 3. Return g(f(X) + N) :param parent_samples: Samples from the inputs X. :param noise_samples: Samples from the noise N. :return: The Y values based on the given samples. """ parent_samples, noise_samples = shape_into_2d(parent_samples, noise_samples) predictions = shape_into_2d(self._prediction_model.predict(parent_samples)) return self._invertible_function.evaluate(predictions + noise_samples)
def __str__(self) -> str: return "%s with %s and an %s" % ( self.__class__.__name__, self._prediction_model.__class__.__name__, self._invertible_function.__class__.__name__, )
[docs] def clone(self): return PostNonlinearModel( prediction_model=self._prediction_model.clone(), noise_model=self._noise_model.clone(), invertible_function=copy.deepcopy(self._invertible_function), )
@property def prediction_model(self) -> PredictionModel: return self._prediction_model @property def noise_model(self) -> StochasticModel: return self._noise_model @property def invertible_function(self) -> InvertibleFunction: return self._invertible_function
[docs]class AdditiveNoiseModel(PostNonlinearModel): """Represents the continuous functional causal model of the form Y = f(X) + N, where X is the input (typically, direct causal parents of Y) and the noise N is assumed to be independent of X. This is a special instance of a :py:class:`PostNonlinearModel <dowhy.gcm.PostNonlinearModel>` where the function g is the identity function. Given joint samples from (X, Y), this model can be fitted by first training a model f (e.g. using least squares regression) and then reconstruct N by N = Y - f(X), i.e. using the residual. """ def __init__(self, prediction_model: PredictionModel, noise_model: Optional[StochasticModel] = None) -> None: if noise_model is None: from dowhy.gcm.stochastic_models import EmpiricalDistribution noise_model = EmpiricalDistribution() from dowhy.gcm.ml.regression import InvertibleIdentityFunction super(AdditiveNoiseModel, self).__init__( prediction_model=prediction_model, noise_model=noise_model, invertible_function=InvertibleIdentityFunction() )
[docs] def clone(self): return AdditiveNoiseModel(prediction_model=self.prediction_model.clone(), noise_model=self.noise_model.clone())
[docs]class ProbabilityEstimatorModel(ABC):
[docs] @abstractmethod def estimate_probabilities(self, parent_samples: np.ndarray) -> np.ndarray: raise NotImplementedError
[docs]class ClassifierFCM(FunctionalCausalModel, ProbabilityEstimatorModel): """Represents the categorical functional causal model of the form Y = f(X, N), where X is the input (typically, direct causal parents of Y) and the noise N here is uniform on [0, 1]. The model is mostly based on a standard classification model that outputs probabilities. In order to generate a new random sample given an input x, the return value y is uniformly sampled based on the class probabilities p(y | x). Here, the noise is used to make this sampling process deterministic by using the cumulative distribution functions defined by the given inputs. """ def __init__(self, classifier_model: Optional[ClassificationModel] = None) -> None: self._classifier_model = classifier_model if classifier_model is None: from dowhy.gcm.ml.classification import create_hist_gradient_boost_classifier self._classifier_model = create_hist_gradient_boost_classifier()
[docs] def draw_noise_samples(self, num_samples: int) -> np.ndarray: """Returns uniformly sampled values on [0, 1]. :param num_samples: Number of noise samples. :return: Noise samples on [0, 1]. """ return shape_into_2d(np.random.uniform(0, 1, num_samples))
[docs] def evaluate(self, parent_samples: np.ndarray, noise_samples: np.ndarray) -> np.ndarray: """Evaluates the model Y = f(X, N), where X are the parent_samples and N the noise_samples. Here, the cumulative distribution functions are defined by the parent_samples. For instance, lets say we have 2 classes, n = 0.7 and an input x with p(y = 0| x) = 0.6 and p(y = 1| x) = 0.4, then we get y = 1 as a return value. This is because p(y = 0| x) < n <= 1.0, i.e. n falls into the bucket that is spanned by p(y = 1| x). :param parent_samples: Samples from the inputs X. :param noise_samples: Samples from the noise on [0, 1]. :return: Class labels Y based on the inputs and noise. """ noise_samples = shape_into_2d(noise_samples) probabilities = self.estimate_probabilities(parent_samples) probabilities = np.cumsum(probabilities, axis=1) - noise_samples probabilities[probabilities < 0] = 1 return shape_into_2d(np.array(self.get_class_names(np.argmin(probabilities, axis=1))))
[docs] def estimate_probabilities(self, parent_samples: np.ndarray) -> np.ndarray: """Returns the class probabilities for the given parent_samples. :param parent_samples: Samples from inputs X. :return: A nxd numpy matrix with class probabilities for each sample, where n is the number of samples and d the number of classes. Here, array entry A[i][j] corresponds to the i-th sample indicating the probability of the j-th class. """ return self._classifier_model.predict_probabilities(parent_samples)
[docs] def fit(self, X: np.ndarray, Y: np.ndarray) -> None: """Fits the underlying classification model. :param X: Input samples. :param Y: Target labels. :return: None """ X, Y = shape_into_2d(X, Y) if not is_categorical(Y): raise ValueError("The target data needs to be categorical in the form of strings!") self._classifier_model.fit(X=X, Y=Y)
[docs] def clone(self): return ClassifierFCM(classifier_model=self._classifier_model.clone())
[docs] def get_class_names(self, class_indices: np.ndarray) -> List[str]: return [self._classifier_model.classes[index] for index in class_indices]
@property def classifier_model(self) -> ClassificationModel: return self._classifier_model