Source code for dowhy.gcm.fcms

"""This module defines multiple implementations of the abstract class :class:`~dowhy.gcm.graph.FunctionalCausalModel`
(FCM)

Classes in this module should be considered experimental, meaning there might be breaking API changes in the future.
"""

import copy
from abc import ABC, abstractmethod
from typing import List, Optional

import numpy as np

from dowhy.gcm.graph import FunctionalCausalModel, InvertibleFunctionalCausalModel, StochasticModel
from dowhy.gcm.util.general import is_categorical, shape_into_2d


[docs]class PredictionModel:
    """Represents general prediction model implementations. Each prediction model should provide a fit and a predict
    method."""

[docs]    @abstractmethod
    def fit(self, X: np.ndarray, Y: np.ndarray) -> None:
        raise NotImplementedError

[docs]    @abstractmethod
    def predict(self, X: np.ndarray) -> np.ndarray:
        raise NotImplementedError

[docs]    @abstractmethod
    def clone(self):
        """
        Clones the prediction model using the same hyper parameters but not fitted.

        :return: An unfitted clone of the prediction model.
        """
        raise NotImplementedError


[docs]class ClassificationModel(PredictionModel):
[docs]    @abstractmethod
    def predict_probabilities(self, X: np.array) -> np.ndarray:
        raise NotImplementedError

    @property
    @abstractmethod
    def classes(self) -> List[str]:
        raise NotImplementedError


[docs]class InvertibleFunction:
[docs]    @abstractmethod
    def evaluate(self, X: np.ndarray) -> np.ndarray:
        """Applies the function on the input."""
        raise NotImplementedError

[docs]    @abstractmethod
    def evaluate_inverse(self, X: np.ndarray) -> np.ndarray:
        """Returns the outcome of applying the inverse of the function on the inputs."""
        raise NotImplementedError


[docs]class PostNonlinearModel(InvertibleFunctionalCausalModel):
    """
    Represents an post nonlinear FCM, i.e. models of the form:
        Y = g(f(X) + N),
    where X are parent nodes of the target node Y, f an arbitrary prediction model expecting inputs from the
    parents X, N a noise variable and g an invertible function.
    """

    def __init__(
        self, prediction_model: PredictionModel, noise_model: StochasticModel, invertible_function: InvertibleFunction
    ) -> None:
        """
        :param prediction_model: The prediction model f.
        :param invertible_function: The invertible function g.
        :param noise_model: The StochasticModel to describe the distribution of the noise N.
        """
        self._prediction_model = prediction_model
        self._noise_model = noise_model
        self._invertible_function = invertible_function

[docs]    def fit(self, X: np.ndarray, Y: np.ndarray) -> None:
        """Fits the post non-linear model of the form Y = g(f(X) + N). Here, this consists of three steps given
        samples from (X, Y):

            1. Transform Y via the inverse of g: g^-1(Y) = f(X) + N
            2. Fit the model for f on (X, g^-1(Y))
            3. Reconstruct N based on the residual N = g^-1(Y) - f(X)

        Note that the noise here can be inferred uniquely if the model assumption holds.

        :param X: Samples from the input X.
        :param Y: Samples from the target Y.
        :return: None
        """
        X, Y = shape_into_2d(X, Y)

        self._prediction_model.fit(X=X, Y=self._invertible_function.evaluate_inverse(Y))
        self._noise_model.fit(X=self.estimate_noise(Y, X))

[docs]    def estimate_noise(self, target_samples: np.ndarray, parent_samples: np.ndarray) -> np.ndarray:
        """Reconstruct the noise given samples from (X, Y). This is done by:

            1. Transform Y via the inverse of g: g^-1(Y) = f(X) + N
            2. Return the residual g^-1(Y) - f(X)

        :param target_samples: Samples from the input X.
        :param parent_samples: Samples from the target Y.
        :return: The reconstructed noise based on the given samples.
        """
        target_samples, parent_samples = shape_into_2d(target_samples, parent_samples)

        return self._invertible_function.evaluate_inverse(target_samples) - self._prediction_model.predict(
            parent_samples
        )

[docs]    def draw_noise_samples(self, num_samples: int) -> np.ndarray:
        """Draws samples from the noise distribution N.

        :param num_samples: Number of noise samples.
        :return: A numpy array containing num_samples samples from the noise.
        """
        return self._noise_model.draw_samples(num_samples)

[docs]    def evaluate(self, parent_samples: np.ndarray, noise_samples: np.ndarray) -> np.ndarray:
        """Evaluates the post non-linear model given samples (X, N). This is done by:

            1. Evaluate f(X)
            2. Evaluate f(X) + N
            3. Return g(f(X) + N)

        :param parent_samples: Samples from the inputs X.
        :param noise_samples: Samples from the noise N.
        :return: The Y values based on the given samples.
        """
        parent_samples, noise_samples = shape_into_2d(parent_samples, noise_samples)
        predictions = shape_into_2d(self._prediction_model.predict(parent_samples))

        return self._invertible_function.evaluate(predictions + noise_samples)

    def __str__(self) -> str:
        return "%s with %s and an %s" % (
            self.__class__.__name__,
            self._prediction_model.__class__.__name__,
            self._invertible_function.__class__.__name__,
        )

[docs]    def clone(self):
        return PostNonlinearModel(
            prediction_model=self._prediction_model.clone(),
            noise_model=self._noise_model.clone(),
            invertible_function=copy.deepcopy(self._invertible_function),
        )

    @property
    def prediction_model(self) -> PredictionModel:
        return self._prediction_model

    @property
    def noise_model(self) -> StochasticModel:
        return self._noise_model

    @property
    def invertible_function(self) -> InvertibleFunction:
        return self._invertible_function


[docs]class AdditiveNoiseModel(PostNonlinearModel):
    """Represents the continuous functional causal model of the form
        Y = f(X) + N,
    where X is the input (typically, direct causal parents of Y) and the noise N is assumed to be independent of X. This
    is a special instance of a :py:class:`PostNonlinearModel <dowhy.gcm.PostNonlinearModel>` where the function g is the
    identity function.

    Given joint samples from (X, Y), this model can be fitted by first training a model f (e.g. using least squares
    regression) and then reconstruct N by N = Y - f(X), i.e. using the residual.
    """

    def __init__(self, prediction_model: PredictionModel, noise_model: Optional[StochasticModel] = None) -> None:
        if noise_model is None:
            from dowhy.gcm.stochastic_models import EmpiricalDistribution

            noise_model = EmpiricalDistribution()

        from dowhy.gcm.ml.regression import InvertibleIdentityFunction

        super(AdditiveNoiseModel, self).__init__(
            prediction_model=prediction_model, noise_model=noise_model, invertible_function=InvertibleIdentityFunction()
        )

[docs]    def clone(self):
        return AdditiveNoiseModel(prediction_model=self.prediction_model.clone(), noise_model=self.noise_model.clone())


[docs]class ProbabilityEstimatorModel(ABC):
[docs]    @abstractmethod
    def estimate_probabilities(self, parent_samples: np.ndarray) -> np.ndarray:
        raise NotImplementedError


[docs]class ClassifierFCM(FunctionalCausalModel, ProbabilityEstimatorModel):
    """Represents the categorical functional causal model of the form
        Y = f(X, N),
    where X is the input (typically, direct causal parents of Y) and the noise N here is uniform on [0, 1]. The model
    is mostly based on a standard classification model that outputs probabilities. In order to generate a new random
    sample given an input x, the return value y is uniformly sampled based on the class probabilities p(y | x). Here,
    the noise is used to make this sampling process deterministic by using the cumulative distribution functions defined
    by the given inputs.
    """

    def __init__(self, classifier_model: Optional[ClassificationModel] = None) -> None:
        self._classifier_model = classifier_model

        if classifier_model is None:
            from dowhy.gcm.ml.classification import create_hist_gradient_boost_classifier

            self._classifier_model = create_hist_gradient_boost_classifier()

[docs]    def draw_noise_samples(self, num_samples: int) -> np.ndarray:
        """Returns uniformly sampled values on [0, 1].

        :param num_samples: Number of noise samples.
        :return: Noise samples on [0, 1].
        """
        return shape_into_2d(np.random.uniform(0, 1, num_samples))

[docs]    def evaluate(self, parent_samples: np.ndarray, noise_samples: np.ndarray) -> np.ndarray:
        """Evaluates the model Y = f(X, N), where X are the parent_samples and N the noise_samples. Here, the
        cumulative distribution functions are defined by the parent_samples. For instance, lets say we have 2
        classes, n = 0.7 and an input x with p(y = 0| x) = 0.6 and p(y = 1| x) = 0.4, then we get y = 1 as a return
        value. This is because p(y = 0| x) < n <= 1.0, i.e. n falls into the bucket that is spanned by p(y = 1| x).

        :param parent_samples: Samples from the inputs X.
        :param noise_samples: Samples from the noise on [0, 1].
        :return: Class labels Y based on the inputs and noise.
        """
        noise_samples = shape_into_2d(noise_samples)
        probabilities = self.estimate_probabilities(parent_samples)

        probabilities = np.cumsum(probabilities, axis=1) - noise_samples
        probabilities[probabilities < 0] = 1

        return shape_into_2d(np.array(self.get_class_names(np.argmin(probabilities, axis=1))))

[docs]    def estimate_probabilities(self, parent_samples: np.ndarray) -> np.ndarray:
        """Returns the class probabilities for the given parent_samples.

        :param parent_samples: Samples from inputs X.
        :return: A nxd numpy matrix with class probabilities for each sample, where n is the number of samples and d
                 the number of classes. Here, array entry A[i][j] corresponds to the i-th sample indicating the
                 probability of the j-th class.
        """
        return self._classifier_model.predict_probabilities(parent_samples)

[docs]    def fit(self, X: np.ndarray, Y: np.ndarray) -> None:
        """Fits the underlying classification model.

        :param X: Input samples.
        :param Y: Target labels.
        :return: None
        """
        X, Y = shape_into_2d(X, Y)

        if not is_categorical(Y):
            raise ValueError("The target data needs to be categorical in the form of strings!")

        self._classifier_model.fit(X=X, Y=Y)

[docs]    def clone(self):
        return ClassifierFCM(classifier_model=self._classifier_model.clone())

[docs]    def get_class_names(self, class_indices: np.ndarray) -> List[str]:
        return [self._classifier_model.classes[index] for index in class_indices]

    @property
    def classifier_model(self) -> ClassificationModel:
        return self._classifier_model