Source code for dowhy.gcm.density_estimators

"""This module contains implementations of different density estimators."""

from typing import Optional

import numpy as np
from sklearn.mixture import BayesianGaussianMixture
from sklearn.neighbors import KernelDensity

from dowhy.gcm.density_estimator import DensityEstimator
from dowhy.gcm.util.general import shape_into_2d


[docs]class GaussianMixtureDensityEstimator(DensityEstimator): """Represents a density estimator based on a Gaussian mixture model. The estimator uses the sklearn BayesianGaussianMixture model internally. """ def __init__(self, num_components: Optional[int] = None) -> None: self._gmm_model = None self._num_components = num_components
[docs] def fit(self, X: np.ndarray) -> None: if self._num_components is None: self._num_components = int(np.ceil(np.sqrt(X.shape[0] / 2))) self._gmm_model = BayesianGaussianMixture(n_components=self._num_components, covariance_type="full").fit( shape_into_2d(X) )
[docs] def density(self, X: np.ndarray) -> np.ndarray: if self._gmm_model is None: raise RuntimeError("%s has not been fitted!" % self.__class__.__name__) # Note, the output of score_samples are log values. return np.exp(self._gmm_model.score_samples(shape_into_2d(X)))
[docs]class KernelDensityEstimator1D(DensityEstimator): """Represents a kernel based density estimator. The estimator uses the sklearn KernelDensity class internally.""" def __init__(self) -> None: self._kde_model = None
[docs] def fit(self, X: np.ndarray) -> None: X = shape_into_2d(X) self._validate_data(X) bandwidth = np.std(X) * np.power(4 / 3 / X.shape[0], 1 / 5) self._kde_model = KernelDensity(kernel="gaussian", bandwidth=bandwidth).fit(X.reshape(-1, 1))
def _validate_data(self, X: np.ndarray) -> None: if X.shape[1] > 1: raise RuntimeError("%s only supports one dimensional data!" % self.__class__.__name__)
[docs] def density(self, X: np.ndarray) -> np.ndarray: if self._kde_model is None: raise RuntimeError("%s has not been fitted!" % self.__class__.__name__) X = shape_into_2d(X) self._validate_data(X) # Note, the output of score_samples are log values. return np.exp(self._kde_model.score_samples(X.reshape(-1, 1)))