Source code for pywhy_stats.api

from enum import Enum
from types import ModuleType
from typing import Optional
from warnings import warn

import numpy as np
import scipy.stats
from numpy.typing import ArrayLike

from pywhy_stats.independence import fisherz, kci

from .pvalue_result import PValueResult


[docs]class Methods(Enum):
    """Methods for independence testing."""

    AUTO = 0
    """Choose an automatic method based on the data."""

    FISHERZ = fisherz
    """:py:mod:`pywhy_stats.independence.fisherz`: Fisher's Z test for independence"""

    KCI = kci
    """:py:mod:`pywhy_stats.independence.kci`: Conditional kernel independence test"""


[docs]def independence_test(
    X: ArrayLike,
    Y: ArrayLike,
    condition_on: Optional[ArrayLike] = None,
    method=Methods.AUTO,
    **kwargs,
) -> PValueResult:
    """Perform a (conditional) independence test to determine whether X and Y are independent.

    The test may be conditioned on an optional set of variables. This is, test whether ``X _||_ Y |
    condition_on``, where the null hypothesis is that X and Y are independent.

    Parameters
    ----------
    X : ArrayLike, shape (n_samples, n_features_x)
        Data matrix for X.
    Y : ArrayLike, shape (n_samples, n_features_y)
        Data matrix for Y.
    condition_on : ArrayLike or None, shape (n_samples, n_features_z), optional
        Data matrix for the conditioning variables. If None is given, an unconditional test
        is performed.
    method : Methods, optional
        Independence test method from the :class:`pywhy_stats.Methods` enum. Default is
        `Methods.AUTO`, which will automatically select an appropriate method.
    **kwargs : dict or None, optional
        Additional keyword arguments to be passed to the specific test method

    Returns
    -------
    result : PValueResult
        An instance of the PValueResult data class, containing the p-value, test statistic,
        and any additional information related to the independence test.

    See Also
    --------
    pywhy_stats.independence.fisherz : Fisher's Z test for independence
    pywhy_stats.independence.kci : Kernel Conditional Independence test
    """
    method_module: ModuleType
    if method == Methods.AUTO:
        method_module = Methods.KCI
    elif not isinstance(method, Methods):
        raise ValueError(
            f"Invalid method type. Expected one of {Methods.__members__.keys()}, "
            f"but got {method}."
        )
    else:
        method_module = method  # type: ignore

    if method_module == Methods.FISHERZ:
        if condition_on is None:
            data = [X, Y]
        else:
            data = [X, Y, condition_on]
        for _data in data:
            res = scipy.stats.normaltest(_data, axis=0)

            # XXX: we should add pinguoin as an optional dependency for doing multi-comp stuff
            if np.atleast_1d(res.pvalue).any() < 0.05:
                warn(
                    "The provided data does not seem to be Gaussian, but the Fisher-Z test "
                    "assumes that the data follows a Gaussian distribution. The result should "
                    "be interpreted carefully or consider a different independence test method."
                )

    if condition_on is None:
        return method_module.value.ind(X, Y, **kwargs)
    else:
        return method_module.value.condind(X, Y, condition_on, **kwargs)