Source code for dowhy.causal_identifier

import copy
import itertools
import logging

import sympy as sp
import sympy.stats as spstats

import dowhy.utils.cli_helpers as cli
from dowhy.utils.api import parse_state


[docs]class CausalIdentifier:

    """Class that implements different identification methods.

    Currently supports backdoor and instrumental variable identification methods. The identification is based on the causal graph provided.

    Other specific ways of identification, such as the ID* algorithm, minimal adjustment criteria, etc. will be added in the future.
    If you'd like to contribute, please raise an issue or a pull request on Github.

    """
    NONPARAMETRIC_ATE="nonparametric-ate"
    NONPARAMETRIC_NDE="nonparametric-nde"
    NONPARAMETRIC_NIE="nonparametric-nie"
    MAX_BACKDOOR_ITERATIONS = 100000

    # Backdoor method names
    BACKDOOR_DEFAULT="default"
    BACKDOOR_EXHAUSTIVE="exhaustive-search"
    BACKDOOR_MIN="minimal-adjustment"
    BACKDOOR_MAX="maximal-adjustment"
    METHOD_NAMES = {BACKDOOR_DEFAULT, BACKDOOR_EXHAUSTIVE, BACKDOOR_MIN, BACKDOOR_MAX}
    DEFAULT_BACKDOOR_METHOD = BACKDOOR_DEFAULT

    def __init__(self, graph, estimand_type,
            method_name = "default",
            proceed_when_unidentifiable=False):
        self._graph = graph
        self.estimand_type = estimand_type
        self.treatment_name = graph.treatment_name
        self.outcome_name = graph.outcome_name
        self.method_name = method_name
        self._proceed_when_unidentifiable = proceed_when_unidentifiable
        self.logger = logging.getLogger(__name__)

[docs]    def identify_effect(self, optimize_backdoor=False):
        """Main method that returns an identified estimand (if one exists).

        If estimand_type is non-parametric ATE, then  uses backdoor, instrumental variable and frontdoor identification methods,  to check if an identified estimand exists, based on the causal graph.

        :param self: instance of the CausalIdentifier class (or its subclass)
        :returns:  target estimand, an instance of the IdentifiedEstimand class
        """
        # First, check if there is a directed path from action to outcome
        if not self._graph.has_directed_path(self.treatment_name, self.outcome_name):
            self.logger.warn("No directed path from treatment to outcome. Causal Effect is zero.")
            return IdentifiedEstimand(self,
                    treatment_variable=self.treatment_name,
                    outcome_variable=self.outcome_name,
                    no_directed_path=True)
        if self.estimand_type == CausalIdentifier.NONPARAMETRIC_ATE:
            return self.identify_ate_effect(optimize_backdoor=optimize_backdoor)
        elif self.estimand_type == CausalIdentifier.NONPARAMETRIC_NDE:
            return self.identify_nde_effect()
        elif self.estimand_type == CausalIdentifier.NONPARAMETRIC_NIE:
            return self.identify_nie_effect()
        else:
            raise ValueError("Estimand type is not supported. Use either {0}, {1}, or {2}.".format(
                CausalIdentifier.NONPARAMETRIC_ATE,
                CausalIdentifier.NONPARAMETRIC_NDE,
                CausalIdentifier.NONPARAMETRIC_NIE))

[docs]    def identify_ate_effect(self, optimize_backdoor):
        estimands_dict = {}
        mediation_first_stage_confounders = None
        mediation_second_stage_confounders = None
        ### 1. BACKDOOR IDENTIFICATION
        # First, checking if there are any valid backdoor adjustment sets
        if optimize_backdoor == False:
            backdoor_sets = self.identify_backdoor(self.treatment_name, self.outcome_name)
        else:
            from dowhy.causal_identifiers.backdoor import Backdoor
            path = Backdoor(self._graph._graph, self.treatment_name, self.outcome_name)
            backdoor_sets = path.get_backdoor_vars()
        estimands_dict, backdoor_variables_dict = self.build_backdoor_estimands_dict(
                self.treatment_name,
                self.outcome_name,
                backdoor_sets,
                estimands_dict)
        # Setting default "backdoor" identification adjustment set
        default_backdoor_id = self.get_default_backdoor_set_id(backdoor_variables_dict)
        if len(backdoor_variables_dict) > 0:
            estimands_dict["backdoor"] = estimands_dict.get(str(default_backdoor_id), None)
            backdoor_variables_dict["backdoor"] = backdoor_variables_dict.get(str(default_backdoor_id), None)
        else:
            estimands_dict["backdoor"] = None
        ### 2. INSTRUMENTAL VARIABLE IDENTIFICATION
        # Now checking if there is also a valid iv estimand
        instrument_names = self._graph.get_instruments(self.treatment_name,
                                                       self.outcome_name)
        self.logger.info("Instrumental variables for treatment and outcome:" +
                         str(instrument_names))
        if len(instrument_names) > 0:
            iv_estimand_expr = self.construct_iv_estimand(
                self.estimand_type,
                self._graph.treatment_name,
                self._graph.outcome_name,
                instrument_names
            )
            self.logger.debug("Identified expression = " + str(iv_estimand_expr))
            estimands_dict["iv"] = iv_estimand_expr
        else:
            estimands_dict["iv"] = None

        ### 3. FRONTDOOR IDENTIFICATION
        # Now checking if there is a valid frontdoor variable
        frontdoor_variables_names = self.identify_frontdoor()
        self.logger.info("Frontdoor variables for treatment and outcome:" +
                str(frontdoor_variables_names))
        if len(frontdoor_variables_names) >0:
            frontdoor_estimand_expr = self.construct_frontdoor_estimand(
                self.estimand_type,
                self._graph.treatment_name,
                self._graph.outcome_name,
                frontdoor_variables_names
            )
            self.logger.debug("Identified expression = " + str(frontdoor_estimand_expr))
            estimands_dict["frontdoor"] = frontdoor_estimand_expr
            mediation_first_stage_confounders = self.identify_mediation_first_stage_confounders(self.treatment_name, frontdoor_variables_names)
            mediation_second_stage_confounders = self.identify_mediation_second_stage_confounders(frontdoor_variables_names, self.outcome_name)
        else:
            estimands_dict["frontdoor"] = None

        # Finally returning the estimand object
        estimand = IdentifiedEstimand(
            self,
            treatment_variable=self._graph.treatment_name,
            outcome_variable=self._graph.outcome_name,
            estimand_type=self.estimand_type,
            estimands=estimands_dict,
            backdoor_variables=backdoor_variables_dict,
            instrumental_variables=instrument_names,
            frontdoor_variables=frontdoor_variables_names,
            mediation_first_stage_confounders=mediation_first_stage_confounders,
            mediation_second_stage_confounders=mediation_second_stage_confounders,
            default_backdoor_id = default_backdoor_id
        )
        return estimand

[docs]    def identify_nie_effect(self):
        estimands_dict = {}
        ### 1. FIRST DOING BACKDOOR IDENTIFICATION
        # First, checking if there are any valid backdoor adjustment sets
        backdoor_sets = self.identify_backdoor(self.treatment_name, self.outcome_name)
        estimands_dict, backdoor_variables_dict = self.build_backdoor_estimands_dict(
                self.treatment_name,
                self.outcome_name,
                backdoor_sets,
                estimands_dict)
        # Setting default "backdoor" identification adjustment set
        default_backdoor_id = self.get_default_backdoor_set_id(backdoor_variables_dict)
        backdoor_variables_dict["backdoor"] = backdoor_variables_dict.get(str(default_backdoor_id), None)

        ### 2. SECOND, CHECKING FOR MEDIATORS
        # Now checking if there are valid mediator variables
        estimands_dict = {} # Need to reinitialize this dictionary to avoid including the backdoor sets
        mediation_first_stage_confounders = None
        mediation_second_stage_confounders = None
        mediators_names = self.identify_mediation()
        self.logger.info("Mediators for treatment and outcome:" +
                str(mediators_names))
        if len(mediators_names) >0:
            mediation_estimand_expr = self.construct_mediation_estimand(
                self.estimand_type,
                self._graph.treatment_name,
                self._graph.outcome_name,
                mediators_names
            )
            self.logger.debug("Identified expression = " + str(mediation_estimand_expr))
            estimands_dict["mediation"] = mediation_estimand_expr
            mediation_first_stage_confounders = self.identify_mediation_first_stage_confounders(self.treatment_name, mediators_names)
            mediation_second_stage_confounders = self.identify_mediation_second_stage_confounders(mediators_names, self.outcome_name)
        else:
            estimands_dict["mediation"] = None
        # Finally returning the estimand object
        estimand = IdentifiedEstimand(
            self,
            treatment_variable=self._graph.treatment_name,
            outcome_variable=self._graph.outcome_name,
            estimand_type=self.estimand_type,
            estimands=estimands_dict,
            backdoor_variables=backdoor_variables_dict,
            instrumental_variables=None,
            frontdoor_variables=None,
            mediator_variables=mediators_names,
            mediation_first_stage_confounders=mediation_first_stage_confounders,
            mediation_second_stage_confounders=mediation_second_stage_confounders,
            default_backdoor_id = None
        )
        return estimand

[docs]    def identify_nde_effect(self):
        estimands_dict = {}
        ### 1. FIRST DOING BACKDOOR IDENTIFICATION
        # First, checking if there are any valid backdoor adjustment sets
        backdoor_sets = self.identify_backdoor(self.treatment_name, self.outcome_name)
        estimands_dict, backdoor_variables_dict = self.build_backdoor_estimands_dict(
                self.treatment_name,
                self.outcome_name,
                backdoor_sets,
                estimands_dict)
        # Setting default "backdoor" identification adjustment set
        default_backdoor_id = self.get_default_backdoor_set_id(backdoor_variables_dict)
        backdoor_variables_dict["backdoor"] = backdoor_variables_dict.get(str(default_backdoor_id), None)

        ### 2. SECOND, CHECKING FOR MEDIATORS
        # Now checking if there are valid mediator variables
        estimands_dict = {}
        mediation_first_stage_confounders = None
        mediation_second_stage_confounders = None
        mediators_names = self.identify_mediation()
        self.logger.info("Mediators for treatment and outcome:" +
                str(mediators_names))
        if len(mediators_names) >0:
            mediation_estimand_expr = self.construct_mediation_estimand(
                self.estimand_type,
                self._graph.treatment_name,
                self._graph.outcome_name,
                mediators_names
            )
            self.logger.debug("Identified expression = " + str(mediation_estimand_expr))
            estimands_dict["mediation"] = mediation_estimand_expr
            mediation_first_stage_confounders = self.identify_mediation_first_stage_confounders(self.treatment_name, mediators_names)
            mediation_second_stage_confounders = self.identify_mediation_second_stage_confounders(mediators_names, self.outcome_name)
        else:
            estimands_dict["mediation"] = None
        # Finally returning the estimand object
        estimand = IdentifiedEstimand(
            self,
            treatment_variable=self._graph.treatment_name,
            outcome_variable=self._graph.outcome_name,
            estimand_type=self.estimand_type,
            estimands=estimands_dict,
            backdoor_variables=backdoor_variables_dict,
            instrumental_variables=None,
            frontdoor_variables=None,
            mediator_variables=mediators_names,
            mediation_first_stage_confounders=mediation_first_stage_confounders,
            mediation_second_stage_confounders=mediation_second_stage_confounders,
            default_backdoor_id = None
        )
        return estimand

[docs]    def identify_backdoor(self, treatment_name, outcome_name,
            include_unobserved=False, dseparation_algo="default"):
        backdoor_sets = []
        backdoor_paths = None
        bdoor_graph = None
        if dseparation_algo == "naive":
            backdoor_paths = self._graph.get_backdoor_paths(treatment_name, outcome_name)
        elif dseparation_algo == "default":
            bdoor_graph = self._graph.do_surgery(treatment_name,
                    remove_outgoing_edges=True)
        else:
            raise ValueError(f"d-separation algorithm {dseparation_algo} is not supported")
        method_name = self.method_name if self.method_name != CausalIdentifier.BACKDOOR_DEFAULT else CausalIdentifier.DEFAULT_BACKDOOR_METHOD

        # First, checking if empty set is a valid backdoor set
        empty_set = set()
        check = self._graph.check_valid_backdoor_set(treatment_name,
                outcome_name, empty_set,
                backdoor_paths=backdoor_paths, new_graph=bdoor_graph,
                dseparation_algo=dseparation_algo)
        if check["is_dseparated"]:
            backdoor_sets.append({'backdoor_set':empty_set})
            # If the method is `minimal-adjustment`, return the empty set right away.
            if method_name == CausalIdentifier.BACKDOOR_MIN:
                return backdoor_sets

        # Second, checking for all other sets of variables. If include_unobserved is false, then only observed variables are eligible.
        eligible_variables = self._graph.get_all_nodes(include_unobserved=include_unobserved) \
            - set(treatment_name) \
            - set(outcome_name)
        eligible_variables -= self._graph.get_descendants(treatment_name)
        # If var is d-separated from both treatment or outcome, it cannot
        # be a part of the backdoor set
        filt_eligible_variables = set()
        for var in eligible_variables:
            dsep_treat_var = self._graph.check_dseparation(
                    treatment_name, parse_state(var),
                    set())
            dsep_outcome_var = self._graph.check_dseparation(
                    outcome_name, parse_state(var), set())
            if not dsep_outcome_var or not dsep_treat_var:
                filt_eligible_variables.add(var)
        if method_name in CausalIdentifier.METHOD_NAMES:
            backdoor_sets, found_valid_adjustment_set = self.find_valid_adjustment_sets(
                    treatment_name, outcome_name,
                    backdoor_paths, bdoor_graph,
                    dseparation_algo,
                    backdoor_sets, filt_eligible_variables,
                    method_name=method_name,
                    max_iterations= CausalIdentifier.MAX_BACKDOOR_ITERATIONS)
            if method_name == CausalIdentifier.BACKDOOR_DEFAULT and found_valid_adjustment_set:
                # repeat the above search with BACKDOOR_MIN
                backdoor_sets, _ = self.find_valid_adjustment_sets(
                        treatment_name, outcome_name,
                        backdoor_paths, bdoor_graph,
                        dseparation_algo,
                        backdoor_sets, filt_eligible_variables,
                        method_name=CausalIdentifier.BACKDOOR_MIN,
                        max_iterations= CausalIdentifier.MAX_BACKDOOR_ITERATIONS)
        else:
            raise ValueError(f"Identifier method {method_name} not supported. Try one of the following: {CausalIdentifier.METHOD_NAMES}")
        return backdoor_sets

[docs]    def find_valid_adjustment_sets(self, treatment_name, outcome_name,
            backdoor_paths, bdoor_graph, dseparation_algo,
            backdoor_sets, filt_eligible_variables,
            method_name, max_iterations):
        num_iterations = 0
        found_valid_adjustment_set = False
        all_nodes_observed = self._graph.all_observed(self._graph.get_all_nodes())
        # If `minimal-adjustment` method is specified, start the search from the set with minimum size. Otherwise, start from the largest.
        set_sizes = range(1, len(filt_eligible_variables) + 1, 1) if method_name == CausalIdentifier.BACKDOOR_MIN else range(len(filt_eligible_variables), 0, -1)
        for size_candidate_set in set_sizes:
            for candidate_set in itertools.combinations(filt_eligible_variables, size_candidate_set):
                check = self._graph.check_valid_backdoor_set(treatment_name,
                        outcome_name, candidate_set,
                        backdoor_paths=backdoor_paths,
                        new_graph = bdoor_graph,
                        dseparation_algo = dseparation_algo)
                self.logger.debug("Candidate backdoor set: {0}, is_dseparated: {1}".format(candidate_set, check["is_dseparated"]))
                if check["is_dseparated"]:
                    backdoor_sets.append({'backdoor_set': candidate_set})
                    found_valid_adjustment_set = True
                num_iterations += 1
                if method_name == CausalIdentifier.BACKDOOR_EXHAUSTIVE and num_iterations > max_iterations:
                    self.logger.warning(f"Max number of iterations {max_iterations} reached.")
                    break
            # If the backdoor method is `maximal-adjustment` or `minimal-adjustment`, return the first found adjustment set.
            if method_name in {CausalIdentifier.BACKDOOR_DEFAULT, CausalIdentifier.BACKDOOR_MAX, CausalIdentifier.BACKDOOR_MIN} and found_valid_adjustment_set:
                break
            # If all variables are observed, and the biggest eligible set
            # does not satisfy backdoor, then none of its subsets will.
            if method_name in {CausalIdentifier.BACKDOOR_DEFAULT, CausalIdentifier.BACKDOOR_MAX} and all_nodes_observed:
                break
            if num_iterations > max_iterations:
                self.logger.warning(f"Max number of iterations {max_iterations} reached. Could not find a valid backdoor set.")
                break
        return backdoor_sets, found_valid_adjustment_set


[docs]    def get_default_backdoor_set_id(self, backdoor_sets_dict):
        # Adding a None estimand if no backdoor set found
        if len(backdoor_sets_dict) == 0:
            return None

        # Default set contains minimum possible number of instrumental variables, to prevent lowering variance in the treatment variable.
        instrument_names = set(self._graph.get_instruments(self.treatment_name, self.outcome_name))
        iv_count_dict = {key: len(set(bdoor_set).intersection(instrument_names)) for key, bdoor_set in backdoor_sets_dict.items()}
        min_iv_count = min(iv_count_dict.values())
        min_iv_keys = {key for key, iv_count in iv_count_dict.items() if iv_count == min_iv_count}
        min_iv_backdoor_sets_dict = {key: backdoor_sets_dict[key] for key in min_iv_keys}

        # Default set is the one with the least number of adjustment variables (optimizing for efficiency)
        min_set_length = 1000000
        default_key = None
        for key, bdoor_set in min_iv_backdoor_sets_dict.items():
            if len(bdoor_set) < min_set_length:
                min_set_length = len(bdoor_set)
                default_key = key
        return default_key

[docs]    def build_backdoor_estimands_dict(self, treatment_name, outcome_name,
            backdoor_sets, estimands_dict, proceed_when_unidentifiable=None):
        """Build the final dict for backdoor sets by filtering unobserved variables if needed.
        """
        backdoor_variables_dict = {}
        if proceed_when_unidentifiable is None:
            proceed_when_unidentifiable = self._proceed_when_unidentifiable
        is_identified = [ self._graph.all_observed(bset["backdoor_set"]) for bset in backdoor_sets ]

        if any(is_identified):
            self.logger.info("Causal effect can be identified.")
            backdoor_sets_arr = [list(
                bset["backdoor_set"])
                for bset in backdoor_sets
                if self._graph.all_observed(bset["backdoor_set"]) ]
        else: # there is unobserved confounding
            self.logger.warning("Backdoor identification failed.")
            backdoor_sets_arr = []

        for i in range(len(backdoor_sets_arr)):
            backdoor_estimand_expr = self.construct_backdoor_estimand(
                self.estimand_type, treatment_name,
                outcome_name, backdoor_sets_arr[i])
            self.logger.debug("Identified expression = " + str(backdoor_estimand_expr))
            estimands_dict["backdoor"+str(i+1)] = backdoor_estimand_expr
            backdoor_variables_dict["backdoor"+str(i+1)] = backdoor_sets_arr[i]
        return estimands_dict, backdoor_variables_dict

[docs]    def identify_frontdoor(self, dseparation_algo="default"):
        """ Find a valid frontdoor variable if it exists.

        Currently only supports a single variable frontdoor set.
        """
        frontdoor_var = None
        frontdoor_paths = None
        fdoor_graph = None
        if dseparation_algo == "default":
            cond1_graph = self._graph.do_surgery(self.treatment_name,
                    remove_incoming_edges=True)
            bdoor_graph1 = self._graph.do_surgery(self.treatment_name,
                    remove_outgoing_edges=True)
        elif dseparation_algo == "naive":
            frontdoor_paths = self._graph.get_all_directed_paths(self.treatment_name, self.outcome_name)
        else:
            raise ValueError(f"d-separation algorithm {dseparation_algo} is not supported")


        eligible_variables = self._graph.get_descendants(self.treatment_name) \
            - set(self.outcome_name) \
            - set(self._graph.get_descendants(self.outcome_name))
        # For simplicity, assuming a one-variable frontdoor set
        for candidate_var in eligible_variables:
            # Cond 1: All directed paths intercepted by candidate_var
            cond1 = self._graph.check_valid_frontdoor_set(
                self.treatment_name, self.outcome_name,
                parse_state(candidate_var),
                frontdoor_paths=frontdoor_paths,
                new_graph=cond1_graph,
                dseparation_algo=dseparation_algo)
            self.logger.debug("Candidate frontdoor set: {0}, is_dseparated: {1}".format(candidate_var, cond1))
            if not cond1:
                continue
            # Cond 2: No confounding between treatment and candidate var
            cond2 = self._graph.check_valid_backdoor_set(
                self.treatment_name, parse_state(candidate_var),
                set(),
                backdoor_paths=None,
                new_graph= bdoor_graph1,
                dseparation_algo=dseparation_algo)
            if not cond2:
                continue
            # Cond 3: treatment blocks all confounding between candidate_var and outcome
            bdoor_graph2 = self._graph.do_surgery(candidate_var,
                    remove_outgoing_edges=True)
            cond3 = self._graph.check_valid_backdoor_set(
                parse_state(candidate_var), self.outcome_name,
                self.treatment_name,
                backdoor_paths=None,
                new_graph= bdoor_graph2,
                dseparation_algo=dseparation_algo)
            is_valid_frontdoor = cond1 and cond2 and cond3
            if is_valid_frontdoor:
                frontdoor_var = candidate_var
                break
        return parse_state(frontdoor_var)

[docs]    def identify_mediation(self):
        """ Find a valid mediator if it exists.

        Currently only supports a single variable mediator set.
        """
        mediation_var = None
        mediation_paths = self._graph.get_all_directed_paths(self.treatment_name, self.outcome_name)
        eligible_variables = self._graph.get_descendants(self.treatment_name) \
            - set(self.outcome_name)
        # For simplicity, assuming a one-variable mediation set
        for candidate_var in eligible_variables:
            is_valid_mediation = self._graph.check_valid_mediation_set(self.treatment_name,
                    self.outcome_name, parse_state(candidate_var), mediation_paths=mediation_paths)
            self.logger.debug("Candidate mediation set: {0}, on_mediating_path: {1}".format(candidate_var, is_valid_mediation))
            if is_valid_mediation:
                mediation_var = candidate_var
                break
        return parse_state(mediation_var)


        return None

[docs]    def identify_mediation_first_stage_confounders(self, treatment_name, mediators_names):
        # Create estimands dict as per the API for backdoor, but do not return it
        estimands_dict = {}
        backdoor_sets = self.identify_backdoor(treatment_name, mediators_names)
        estimands_dict, backdoor_variables_dict = self.build_backdoor_estimands_dict(
                treatment_name,
                mediators_names,
                backdoor_sets,
                estimands_dict,
                proceed_when_unidentifiable=True)
        # Setting default "backdoor" identification adjustment set
        default_backdoor_id = self.get_default_backdoor_set_id(backdoor_variables_dict)
        estimands_dict["backdoor"] = estimands_dict.get(str(default_backdoor_id), None)
        backdoor_variables_dict["backdoor"] = backdoor_variables_dict.get(str(default_backdoor_id), None)
        return backdoor_variables_dict

[docs]    def identify_mediation_second_stage_confounders(self, mediators_names, outcome_name):
        # Create estimands dict as per the API for backdoor, but do not return it
        estimands_dict = {}
        backdoor_sets = self.identify_backdoor(mediators_names, outcome_name)
        estimands_dict, backdoor_variables_dict = self.build_backdoor_estimands_dict(
                mediators_names,
                outcome_name,
                backdoor_sets,
                estimands_dict,
                proceed_when_unidentifiable=True)
        # Setting default "backdoor" identification adjustment set
        default_backdoor_id = self.get_default_backdoor_set_id(backdoor_variables_dict)
        estimands_dict["backdoor"] = estimands_dict.get(str(default_backdoor_id), None)
        backdoor_variables_dict["backdoor"] = backdoor_variables_dict.get(str(default_backdoor_id), None)
        return backdoor_variables_dict

[docs]    def construct_backdoor_estimand(self, estimand_type, treatment_name,
                                    outcome_name, common_causes):
        # TODO: outputs string for now, but ideally should do symbolic
        # expressions Mon 19 Feb 2018 04:54:17 PM DST
        # TODO Better support for multivariate treatments

        expr = None
        outcome_name = outcome_name[0]
        num_expr_str = outcome_name
        if len(common_causes)>0:
            num_expr_str += "|" + ",".join(common_causes)
        expr = "d(" + num_expr_str + ")/d" + ",".join(treatment_name)
        sym_mu = sp.Symbol("mu")
        sym_sigma = sp.Symbol("sigma", positive=True)
        sym_outcome = spstats.Normal(num_expr_str, sym_mu, sym_sigma)
        sym_treatment_symbols = [sp.Symbol(t) for t in treatment_name]
        sym_treatment = sp.Array(sym_treatment_symbols)
        sym_conditional_outcome = spstats.Expectation(sym_outcome)
        sym_effect = sp.Derivative(sym_conditional_outcome, sym_treatment)

        sym_assumptions = {
            'Unconfoundedness': (
                u"If U\N{RIGHTWARDS ARROW}{{{0}}} and U\N{RIGHTWARDS ARROW}{1}"
                " then P({1}|{0},{2},U) = P({1}|{0},{2})"
            ).format(",".join(treatment_name), outcome_name, ",".join(common_causes))
        }

        estimand = {
            'estimand': sym_effect,
            'assumptions': sym_assumptions
        }
        return estimand

[docs]    def construct_iv_estimand(self, estimand_type, treatment_name,
                              outcome_name, instrument_names):
        # TODO: support multivariate treatments better.
        expr = None
        outcome_name = outcome_name[0]
        sym_outcome = spstats.Normal(outcome_name, 0, 1)
        sym_treatment_symbols = [spstats.Normal(t, 0, 1) for t in treatment_name]
        sym_treatment = sp.Array(sym_treatment_symbols)
        sym_instrument_symbols = [sp.Symbol(inst) for inst in instrument_names]
        sym_instrument = sp.Array(sym_instrument_symbols)  # ",".join(instrument_names))
        sym_outcome_derivative = sp.Derivative(sym_outcome, sym_instrument)
        sym_treatment_derivative = sp.Derivative(sym_treatment, sym_instrument)
        sym_effect = spstats.Expectation(sym_outcome_derivative / sym_treatment_derivative)
        sym_assumptions = {
            "As-if-random": (
                "If U\N{RIGHTWARDS ARROW}\N{RIGHTWARDS ARROW}{0} then "
                "\N{NOT SIGN}(U \N{RIGHTWARDS ARROW}\N{RIGHTWARDS ARROW}{{{1}}})"
            ).format(outcome_name, ",".join(instrument_names)),
            "Exclusion": (
                u"If we remove {{{0}}}\N{RIGHTWARDS ARROW}{{{1}}}, then "
                u"\N{NOT SIGN}({{{0}}}\N{RIGHTWARDS ARROW}{2})"
            ).format(",".join(instrument_names), ",".join(treatment_name),
                     outcome_name)
        }

        estimand = {
            'estimand': sym_effect,
            'assumptions': sym_assumptions
        }
        return estimand

[docs]    def construct_frontdoor_estimand(self, estimand_type, treatment_name,
                              outcome_name, frontdoor_variables_names):
        # TODO: support multivariate treatments better.
        expr = None
        outcome_name = outcome_name[0]
        sym_outcome = spstats.Normal(outcome_name, 0, 1)
        sym_treatment_symbols = [spstats.Normal(t, 0, 1) for t in treatment_name]
        sym_treatment = sp.Array(sym_treatment_symbols)
        sym_frontdoor_symbols = [sp.Symbol(inst) for inst in frontdoor_variables_names]
        sym_frontdoor = sp.Array(sym_frontdoor_symbols)  # ",".join(instrument_names))
        sym_outcome_derivative = sp.Derivative(sym_outcome, sym_frontdoor)
        sym_treatment_derivative = sp.Derivative(sym_frontdoor, sym_treatment)
        sym_effect = spstats.Expectation(sym_treatment_derivative * sym_outcome_derivative)
        sym_assumptions = {
            "Full-mediation": (
                "{2} intercepts (blocks) all directed paths from {0} to {1}."
            ).format(",".join(treatment_name), ",".join(outcome_name), ",".join(frontdoor_variables_names)),
            "First-stage-unconfoundedness": (
                u"If U\N{RIGHTWARDS ARROW}{{{0}}} and U\N{RIGHTWARDS ARROW}{{{1}}}"
                " then P({1}|{0},U) = P({1}|{0})"
            ).format(",".join(treatment_name), ",".join(frontdoor_variables_names)),
            "Second-stage-unconfoundedness": (
                u"If U\N{RIGHTWARDS ARROW}{{{2}}} and U\N{RIGHTWARDS ARROW}{1}"
                " then P({1}|{2}, {0}, U) = P({1}|{2}, {0})"
            ).format(",".join(treatment_name), outcome_name, ",".join(frontdoor_variables_names))
        }

        estimand = {
            'estimand': sym_effect,
            'assumptions': sym_assumptions
        }
        return estimand

[docs]    def construct_mediation_estimand(self, estimand_type, treatment_name,
                              outcome_name, mediators_names):
        # TODO: support multivariate treatments better.
        expr = None
        if estimand_type in (CausalIdentifier.NONPARAMETRIC_NDE, CausalIdentifier.NONPARAMETRIC_NIE):
            outcome_name = outcome_name[0]
            sym_outcome = spstats.Normal(outcome_name, 0, 1)
            sym_treatment_symbols = [spstats.Normal(t, 0, 1) for t in treatment_name]
            sym_treatment = sp.Array(sym_treatment_symbols)
            sym_mediators_symbols = [sp.Symbol(inst) for inst in mediators_names]
            sym_mediators = sp.Array(sym_mediators_symbols)
            sym_outcome_derivative = sp.Derivative(sym_outcome, sym_mediators)
            sym_treatment_derivative = sp.Derivative(sym_mediators, sym_treatment)
            # For direct effect
            num_expr_str = outcome_name
            if len(mediators_names)>0:
                num_expr_str += "|" + ",".join(mediators_names)
            sym_mu = sp.Symbol("mu")
            sym_sigma = sp.Symbol("sigma", positive=True)
            sym_conditional_outcome = spstats.Normal(num_expr_str, sym_mu, sym_sigma)
            sym_directeffect_derivative = sp.Derivative(sym_conditional_outcome, sym_treatment)
            if estimand_type == CausalIdentifier.NONPARAMETRIC_NIE:
                sym_effect = spstats.Expectation(sym_treatment_derivative * sym_outcome_derivative)
            elif estimand_type == CausalIdentifier.NONPARAMETRIC_NDE:
                sym_effect = spstats.Expectation(sym_directeffect_derivative)
            sym_assumptions = {
                "Mediation": (
                    "{2} intercepts (blocks) all directed paths from {0} to {1} except the path {{{0}}}\N{RIGHTWARDS ARROW}{{{1}}}."
                ).format(",".join(treatment_name), ",".join(outcome_name), ",".join(mediators_names)),
                "First-stage-unconfoundedness": (
                    u"If U\N{RIGHTWARDS ARROW}{{{0}}} and U\N{RIGHTWARDS ARROW}{{{1}}}"
                    " then P({1}|{0},U) = P({1}|{0})"
                ).format(",".join(treatment_name), ",".join(mediators_names)),
                "Second-stage-unconfoundedness": (
                    u"If U\N{RIGHTWARDS ARROW}{{{2}}} and U\N{RIGHTWARDS ARROW}{1}"
                    " then P({1}|{2}, {0}, U) = P({1}|{2}, {0})"
                ).format(",".join(treatment_name), outcome_name, ",".join(mediators_names))
            }
        else:
            raise ValueError("Estimand type not supported. Supported estimand types are {0} or {1}'.".format(
                CausalIdentifier.NONPARAMETRIC_NDE,
                CausalIdentifier.NONPARAMETRIC_NIE))

        estimand = {
            'estimand': sym_effect,
            'assumptions': sym_assumptions
        }
        return estimand


[docs]class IdentifiedEstimand:

    """Class for storing a causal estimand, typically as a result of the identification step.

    """

    def __init__(self, identifier, treatment_variable, outcome_variable,
                 estimand_type=None, estimands=None,
                 backdoor_variables=None, instrumental_variables=None,
                 frontdoor_variables=None,
                 mediator_variables=None,
                 mediation_first_stage_confounders=None,
                 mediation_second_stage_confounders=None,
                 default_backdoor_id=None, identifier_method=None,
                 no_directed_path=False):
        self.identifier = identifier
        self.treatment_variable = parse_state(treatment_variable)
        self.outcome_variable = parse_state(outcome_variable)
        self.backdoor_variables = backdoor_variables
        self.instrumental_variables = parse_state(instrumental_variables)
        self.frontdoor_variables = parse_state(frontdoor_variables)
        self.mediator_variables = parse_state(mediator_variables)
        self.mediation_first_stage_confounders=mediation_first_stage_confounders
        self.mediation_second_stage_confounders=mediation_second_stage_confounders
        self.estimand_type = estimand_type
        self.estimands = estimands
        self.default_backdoor_id = default_backdoor_id
        self.identifier_method = identifier_method
        self.no_directed_path = no_directed_path

[docs]    def set_identifier_method(self, identifier_name):
        self.identifier_method = identifier_name

[docs]    def get_backdoor_variables(self, key=None):
        """ Return a list containing the backdoor variables.

            If the calling estimator method is a backdoor method, return the
            backdoor variables corresponding to its target estimand.
            Otherwise, return the backdoor variables for the default backdoor estimand.
        """
        if key is None:
            if self.identifier_method and self.identifier_method.startswith("backdoor"):
                return self.backdoor_variables[self.identifier_method]
            elif self.backdoor_variables is not None and len(self.backdoor_variables) > 0:
                return self.backdoor_variables[self.default_backdoor_id]
            else:
                return []
        else:
            return self.backdoor_variables[key]

[docs]    def set_backdoor_variables(self, bdoor_variables_arr, key=None):
        if key is None:
            key = self.identifier_method
        self.backdoor_variables[key] = bdoor_variables_arr

[docs]    def get_frontdoor_variables(self):
        """Return a list containing the frontdoor variables (if present)
        """
        return self.frontdoor_variables

[docs]    def get_mediator_variables(self):
        """Return a list containing the mediator variables (if present)
        """
        return self.mediator_variables
[docs]    def get_instrumental_variables(self):
        """Return a list containing the instrumental variables (if present)
        """
        return self.instrumental_variables

    def __deepcopy__(self, memo):
        return IdentifiedEstimand(
                self.identifier, # not deep copied
                copy.deepcopy(self.treatment_variable),
                copy.deepcopy(self.outcome_variable),
                estimand_type=copy.deepcopy(self.estimand_type),
                estimands=copy.deepcopy(self.estimands),
                backdoor_variables=copy.deepcopy(self.backdoor_variables),
                instrumental_variables=copy.deepcopy(self.instrumental_variables),
                frontdoor_variables=copy.deepcopy(self.frontdoor_variables),
                mediator_variables=copy.deepcopy(self.mediator_variables),
                default_backdoor_id=copy.deepcopy(self.default_backdoor_id),
                identifier_method=copy.deepcopy(self.identifier_method)
            )

    def __str__(self, only_target_estimand=False, show_all_backdoor_sets=False):
        if self.no_directed_path:
            s = "No directed path from {0} to {1} in the causal graph.".format(
                    self.treatment_variable,
                    self.outcome_variable)
            s += "\nCausal effect is zero."
            return s
        s = "Estimand type: {0}\n".format(self.estimand_type)
        i = 1
        has_valid_backdoor = sum("backdoor" in key for key in self.estimands.keys())
        for k, v in self.estimands.items():
            if show_all_backdoor_sets:
                # Do not show backdoor key unless it is the only backdoor set.
                if k == "backdoor" and has_valid_backdoor > 1:
                    continue
            else:
                # Just show the default backdoor set
                if k.startswith("backdoor") and k != "backdoor":
                    continue
            if only_target_estimand and k != self.identifier_method:
                continue
            s += "\n### Estimand : {0}\n".format(i)
            s += "Estimand name: {0}".format(k)
            if k == self.default_backdoor_id:
                s += " (Default)"
            s += "\n"
            if v is None:
                s += "No such variable(s) found!\n"
            else:
                sp_expr_str = sp.pretty(v["estimand"], use_unicode=True)
                s += "Estimand expression:\n{0}\n".format(sp_expr_str)
                j = 1
                for ass_name, ass_str in v["assumptions"].items():
                    s += "Estimand assumption {0}, {1}: {2}\n".format(j, ass_name, ass_str)
                    j += 1
            i += 1
        return s