{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Estimating effect of multiple treatments" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import logging\n", "\n", "import dowhy\n", "from dowhy import CausalModel\n", "import dowhy.datasets\n", "\n", "import econml\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
X0X1W0W1W2W3v0v1y
01.9150921.620087-0.7340041.166135114.8128686.143449315.635386
10.0021470.669278-0.4881481.662905002.241058-0.50744120.008368
20.2486680.7443310.3953330.363403118.2236799.169402322.803160
31.3918200.6850582.196834-0.0313411213.78481116.2089691188.634520
40.9783810.6565760.6860800.2603942112.35393813.503698785.596373
\n", "
" ], "text/plain": [ " X0 X1 W0 W1 W2 W3 v0 v1 \\\n", "0 1.915092 1.620087 -0.734004 1.166135 1 1 4.812868 6.143449 \n", "1 0.002147 0.669278 -0.488148 1.662905 0 0 2.241058 -0.507441 \n", "2 0.248668 0.744331 0.395333 0.363403 1 1 8.223679 9.169402 \n", "3 1.391820 0.685058 2.196834 -0.031341 1 2 13.784811 16.208969 \n", "4 0.978381 0.656576 0.686080 0.260394 2 1 12.353938 13.503698 \n", "\n", " y \n", "0 315.635386 \n", "1 20.008368 \n", "2 322.803160 \n", "3 1188.634520 \n", "4 785.596373 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = dowhy.datasets.linear_dataset(10, num_common_causes=4, num_samples=10000,\n", " num_instruments=0, num_effect_modifiers=2,\n", " num_treatments=2,\n", " treatment_is_binary=False,\n", " num_discrete_common_causes=2,\n", " num_discrete_effect_modifiers=0,\n", " one_hot_encode=False)\n", "df=data['df']\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:dowhy.causal_model:Model to find the causal effect of treatment ['v0', 'v1'] on outcome ['y']\n" ] } ], "source": [ "model = CausalModel(data=data[\"df\"], \n", " treatment=data[\"treatment_name\"], outcome=data[\"outcome_name\"], \n", " graph=data[\"gml_graph\"])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "model.view_model()\n", "from IPython.display import Image, display\n", "display(Image(filename=\"causal_model.png\"))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['W0', 'Unobserved Confounders', 'W1', 'W2', 'W3']\n", "WARNING:dowhy.causal_identifier:If this is observed data (not from a randomized experiment), there might always be missing confounders. Causal effect cannot be identified perfectly.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt) [y/n] y\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Estimand type: nonparametric-ate\n", "### Estimand : 1\n", "Estimand name: backdoor\n", "Estimand expression:\n", " d \n", "─────────(Expectation(y|W0,W1,W2,W3))\n", "d[v₀ v₁] \n", "Estimand assumption 1, Unconfoundedness: If U→{v0,v1} and U→y then P(y|v0,v1,W0,W1,W2,W3,U) = P(y|v0,v1,W0,W1,W2,W3)\n", "### Estimand : 2\n", "Estimand name: iv\n", "No such variable found!\n", "\n" ] } ], "source": [ "identified_estimand= model.identify_effect()\n", "print(identified_estimand)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Linear model\n", "\n", "Let us first see an example for a linear model. The control_value and treatment_value can be provided as a tuple/list when the treatment is multi-dimensional.\n", "\n", "The interpretation is change in y when v0 and v1 are changed from (0,0) to (1,1)." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n", "INFO:dowhy.causal_estimator:b: y~v0+v1+W0+W1+W2+W3+v0*X0+v0*X1+v1*X0+v1*X1\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "*** Causal Estimate ***\n", "\n", "## Identified estimand\n", "Estimand type: nonparametric-ate\n", "### Estimand : 1\n", "Estimand name: backdoor\n", "Estimand expression:\n", " d \n", "─────────(Expectation(y|W0,W1,W2,W3))\n", "d[v₀ v₁] \n", "Estimand assumption 1, Unconfoundedness: If U→{v0,v1} and U→y then P(y|v0,v1,W0,W1,W2,W3,U) = P(y|v0,v1,W0,W1,W2,W3)\n", "### Estimand : 2\n", "Estimand name: iv\n", "No such variable found!\n", "\n", "## Realized estimand\n", "b: y~v0+v1+W0+W1+W2+W3+v0*X0+v0*X1+v1*X0+v1*X1\n", "Target units: ate\n", "\n", "## Estimate\n", "Mean value: 90.33055385892129\n", "\n" ] } ], "source": [ "linear_estimate = model.estimate_effect(identified_estimand, \n", " method_name=\"backdoor.linear_regression\",\n", " control_value=(0,0),\n", " treatment_value=(1,1),\n", " method_params={'need_conditional_estimates': False})\n", "print(linear_estimate) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can estimate conditional effects, based on effect modifiers. " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:dowhy.causal_estimator:INFO: Using Linear Regression Estimator\n", "INFO:dowhy.causal_estimator:b: y~v0+v1+W0+W1+W2+W3+v0*X0+v0*X1+v1*X0+v1*X1\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "*** Causal Estimate ***\n", "\n", "## Identified estimand\n", "Estimand type: nonparametric-ate\n", "### Estimand : 1\n", "Estimand name: backdoor\n", "Estimand expression:\n", " d \n", "─────────(Expectation(y|W0,W1,W2,W3))\n", "d[v₀ v₁] \n", "Estimand assumption 1, Unconfoundedness: If U→{v0,v1} and U→y then P(y|v0,v1,W0,W1,W2,W3,U) = P(y|v0,v1,W0,W1,W2,W3)\n", "### Estimand : 2\n", "Estimand name: iv\n", "No such variable found!\n", "\n", "## Realized estimand\n", "b: y~v0+v1+W0+W1+W2+W3+v0*X0+v0*X1+v1*X0+v1*X1\n", "Target units: ate\n", "\n", "## Estimate\n", "Mean value: 90.33055385892129\n", "### Conditional Estimates\n", "__categorical__X0 __categorical__X1\n", "(-2.629, 0.14] (-2.936, 0.0176] -3.709237\n", " (0.0176, 0.586] 25.191433\n", " (0.586, 1.084] 43.708158\n", " (1.084, 1.661] 61.782760\n", " (1.661, 4.444] 90.521027\n", "(0.14, 0.72] (-2.936, 0.0176] 24.438689\n", " (0.0176, 0.586] 54.299181\n", " (0.586, 1.084] 72.005026\n", " (1.084, 1.661] 90.203724\n", " (1.661, 4.444] 119.455480\n", "(0.72, 1.232] (-2.936, 0.0176] 41.988317\n", " (0.0176, 0.586] 72.977085\n", " (0.586, 1.084] 90.270855\n", " (1.084, 1.661] 108.403973\n", " (1.661, 4.444] 136.661504\n", "(1.232, 1.845] (-2.936, 0.0176] 58.596700\n", " (0.0176, 0.586] 90.604757\n", " (0.586, 1.084] 108.531656\n", " (1.084, 1.661] 126.376825\n", " (1.661, 4.444] 156.005501\n", "(1.845, 4.73] (-2.936, 0.0176] 90.504006\n", " (0.0176, 0.586] 120.364554\n", " (0.586, 1.084] 137.619648\n", " (1.084, 1.661] 155.753611\n", " (1.661, 4.444] 185.702174\n", "dtype: float64\n" ] } ], "source": [ "linear_estimate = model.estimate_effect(identified_estimand, \n", " method_name=\"backdoor.linear_regression\",\n", " control_value=(0,0),\n", " treatment_value=(1,1))\n", "print(linear_estimate) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## More methods\n", "\n", "You can also use methods from EconML or CausalML libraries that support multiple treatments. You can look at examples from the conditional effect notebook: https://microsoft.github.io/dowhy/example_notebooks/dowhy-conditional-treatment-effects.html\n", "\n", "Propensity-based methods do not support multiple treatments currently. \n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }