sadatnfs · April 9, 2018 22:08
diff --git a/edward_RE_test_Iris.ipynb b/edward_RE_test_Iris.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
    "\n",
    "import tensorflow as tf\n",
    "# import tensorflow_probability as tfp\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.api as sm\n",
    "import statsmodels.formula.api as smf\n",
    "import scipy as sp\n",
    "import edward as ed\n",
    "from edward.models import Normal, Empirical, PointMass\n",
    "from rpy2.robjects import r, pandas2ri\n",
    "pandas2ri.activate()\n",
    "import matplotlib.pyplot as plt\n",
    "from mpl_toolkits.mplot3d import Axes3D\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "plt.style.use('seaborn-whitegrid')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model\n",
    "\n",
    "$$ y_i = \\alpha+ \\alpha_i + \\beta x + \\epsilon_i $$  \n",
    "$$ \\alpha_i \\sim N(0, \\sigma^2_{\\alpha})  $$  \n",
    "$$ \\epsilon_i \\sim N(0, \\sigma^2) $$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>i</th>\n",
       "      <th>log_x</th>\n",
       "      <th>log_y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>setosa</td>\n",
       "      <td>0.336472</td>\n",
       "      <td>1.629241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>setosa</td>\n",
       "      <td>0.336472</td>\n",
       "      <td>1.589235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>setosa</td>\n",
       "      <td>0.262364</td>\n",
       "      <td>1.547563</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>setosa</td>\n",
       "      <td>0.405465</td>\n",
       "      <td>1.526056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>setosa</td>\n",
       "      <td>0.336472</td>\n",
       "      <td>1.609438</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        i     log_x     log_y\n",
       "0  setosa  0.336472  1.629241\n",
       "1  setosa  0.336472  1.589235\n",
       "2  setosa  0.262364  1.547563\n",
       "3  setosa  0.405465  1.526056\n",
       "4  setosa  0.336472  1.609438"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## Get Iris data\n",
    "r.data('iris')\n",
    "iris = r['iris']\n",
    "\n",
    "data = pd.DataFrame({'log_y' : np.log(iris['Sepal.Length']), \\\n",
    "                     'log_x' : np.log(iris['Petal.Length']), \\\n",
    "                     'i' : iris.Species.astype('category')})\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "        Mixed Linear Model Regression Results\n",
      "======================================================\n",
      "Model:            MixedLM Dependent Variable: log_y   \n",
      "No. Observations: 150     Method:             ML      \n",
      "No. Groups:       3       Scale:              0.0044  \n",
      "Min. group size:  50      Likelihood:         186.4619\n",
      "Max. group size:  50      Converged:          Yes     \n",
      "Mean group size:  50.0                                \n",
      "------------------------------------------------------\n",
      "             Coef. Std.Err.   z    P>|z| [0.025 0.975]\n",
      "------------------------------------------------------\n",
      "Intercept    1.211    0.110 11.034 0.000  0.996  1.426\n",
      "log_x        0.464    0.051  9.077 0.000  0.364  0.564\n",
      "groups RE    0.025    0.347                           \n",
      "======================================================\n",
      "\n"
     ]
    }
   ],
   "source": [
    "## Try with stats models\n",
    "N = data.i.unique()\n",
    "md = smf.mixedlm(\"log_y ~ log_x\", data, groups=data[\"i\"]) \n",
    "mdf = md.fit(reml=False) \n",
    "print(mdf.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.6/site-packages/edward/util/random_variables.py:52: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  not np.issubdtype(value.dtype, np.float) and \\\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "200/200 [100%] ██████████████████████████████ Elapsed: 10s | Loss: -144.868\n"
     ]
    }
   ],
   "source": [
    "## Set up stuff for Edward\n",
    "\n",
    "try: \n",
    "    sess.close()\n",
    "except Exception:\n",
    "    pass\n",
    "\n",
    "tf.reset_default_graph()\n",
    "sess = tf.InteractiveSession()\n",
    "\n",
    "\n",
    "n_s = max(data.i.cat.codes) + 1  # number of species\n",
    "n_obs = data.shape[0] # number of obs\n",
    "\n",
    "## Set up fixed effects with not-so-useful priors\n",
    "intercept = Normal(loc=tf.zeros([1]), scale=10.)\n",
    "beta = Normal(loc=tf.zeros([1]), scale=10.)\n",
    "\n",
    "## If using no priors?\n",
    "# intercept = tf.get_variable(\"intercept\", [])\n",
    "# beta = tf.get_variable(\"beta\", [])\n",
    "\n",
    "\n",
    "## Set up random effects\n",
    "sigma_s = tf.sqrt(tf.exp(tf.get_variable(\"sigma_s\", [])))\n",
    "alpha_s = Normal(loc=tf.zeros(n_s), scale=sigma_s * tf.ones(n_s))\n",
    "\n",
    "## Set up likelihood\n",
    "yhat = (tf.gather(alpha_s, data.i.values.codes.astype('int')) +\n",
    "        intercept * tf.ones([n_obs]) + beta * data.log_x.values)\n",
    "y = Normal(loc=yhat, scale=tf.ones(n_obs))\n",
    "\n",
    "\n",
    "## Inference stuff ##\n",
    "\n",
    "\n",
    "## If using KLpq\n",
    "q_alpha_s = Normal(loc=tf.get_variable(\"q_alpha_s/loc\", [n_s]),\n",
    "            scale=tf.nn.softplus(tf.get_variable(\"q_alpha_s/scale\", [1] )))\n",
    "q_intercept = Normal(loc=tf.get_variable(\"q_intercept/loc\", [1]),\n",
    "            scale=tf.nn.softplus(tf.get_variable(\"q_intercept/scale\", [1])))\n",
    "q_beta = Normal(loc=tf.get_variable(\"q_beta/loc\", [1]),\n",
    "            scale=tf.nn.softplus(tf.get_variable(\"q_beta/scale\", [1])))\n",
    "\n",
    "latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n",
    "inference = ed.KLpq(latent_vars, data={y: data.log_y.values})\n",
    "inference.run(n_samples=20, n_iter=200)\n",
    "\n",
    "## If using SGHMC\n",
    "# T = 1000\n",
    "# q_alpha_s = Empirical(params=tf.get_variable(\"q_alpha_s/params\", [T, n_s]))\n",
    "# q_intercept = Empirical(params=tf.get_variable(\"q_intercept/params\", [T, 1]))\n",
    "# q_beta = Empirical(params=tf.get_variable(\"q_beta/params\", [T, 1]))\n",
    "\n",
    "# latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n",
    "\n",
    "# inference = ed.SGHMC(latent_vars, data={y: data.log_y.values})\n",
    "# inference.run(step_size=1e-4)\n",
    "\n",
    "## If using MAP\n",
    "# q_alpha_s = PointMass(params=tf.Variable(tf.zeros(n_s))) \n",
    "# q_intercept = PointMass(params=tf.Variable(tf.zeros(1))) \n",
    "# q_beta = PointMass(params=tf.Variable(tf.zeros(1))) \n",
    "\n",
    "# latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n",
    "\n",
    "# inference = ed.MAP(latent_vars, data={y: data.log_y.values})\n",
    "# inference.run(n_iter=500)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.17341988"
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# beta.eval()\n",
    "q_beta.sample(100).eval().mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.006955293"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q_alpha_s.sample(100).eval().var()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.4500785"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# intercept.eval()\n",
    "q_intercept.sample(100).eval().mean()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 73,
	"metadata": {},
	"outputs": [],
	"source": [
	"import warnings\n",
	"warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
	"\n",
	"import tensorflow as tf\n",
	"# import tensorflow_probability as tfp\n",
	"import numpy as np\n",
	"import pandas as pd\n",
	"import statsmodels.api as sm\n",
	"import statsmodels.formula.api as smf\n",
	"import scipy as sp\n",
	"import edward as ed\n",
	"from edward.models import Normal, Empirical, PointMass\n",
	"from rpy2.robjects import r, pandas2ri\n",
	"pandas2ri.activate()\n",
	"import matplotlib.pyplot as plt\n",
	"from mpl_toolkits.mplot3d import Axes3D\n",
	"import seaborn as sns\n",
	"%matplotlib inline\n",
	"plt.style.use('seaborn-whitegrid')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"## Model\n",
	"\n",
	"$$ y_i = \\alpha+ \\alpha_i + \\beta x + \\epsilon_i $$ \n",
	"$$ \\alpha_i \\sim N(0, \\sigma^2_{\\alpha}) $$ \n",
	"$$ \\epsilon_i \\sim N(0, \\sigma^2) $$"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>i</th>\n",
	" <th>log_x</th>\n",
	" <th>log_y</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>setosa</td>\n",
	" <td>0.336472</td>\n",
	" <td>1.629241</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>setosa</td>\n",
	" <td>0.336472</td>\n",
	" <td>1.589235</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>setosa</td>\n",
	" <td>0.262364</td>\n",
	" <td>1.547563</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>setosa</td>\n",
	" <td>0.405465</td>\n",
	" <td>1.526056</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>setosa</td>\n",
	" <td>0.336472</td>\n",
	" <td>1.609438</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" i log_x log_y\n",
	"0 setosa 0.336472 1.629241\n",
	"1 setosa 0.336472 1.589235\n",
	"2 setosa 0.262364 1.547563\n",
	"3 setosa 0.405465 1.526056\n",
	"4 setosa 0.336472 1.609438"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"## Get Iris data\n",
	"r.data('iris')\n",
	"iris = r['iris']\n",
	"\n",
	"data = pd.DataFrame({'log_y' : np.log(iris['Sepal.Length']), \\\n",
	" 'log_x' : np.log(iris['Petal.Length']), \\\n",
	" 'i' : iris.Species.astype('category')})\n",
	"data.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 114,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" Mixed Linear Model Regression Results\n",
	"======================================================\n",
	"Model: MixedLM Dependent Variable: log_y \n",
	"No. Observations: 150 Method: ML \n",
	"No. Groups: 3 Scale: 0.0044 \n",
	"Min. group size: 50 Likelihood: 186.4619\n",
	"Max. group size: 50 Converged: Yes \n",
	"Mean group size: 50.0 \n",
	"------------------------------------------------------\n",
	" Coef. Std.Err. z P>\|z\| [0.025 0.975]\n",
	"------------------------------------------------------\n",
	"Intercept 1.211 0.110 11.034 0.000 0.996 1.426\n",
	"log_x 0.464 0.051 9.077 0.000 0.364 0.564\n",
	"groups RE 0.025 0.347 \n",
	"======================================================\n",
	"\n"
	]
	}
	],
	"source": [
	"## Try with stats models\n",
	"N = data.i.unique()\n",
	"md = smf.mixedlm(\"log_y ~ log_x\", data, groups=data[\"i\"]) \n",
	"mdf = md.fit(reml=False) \n",
	"print(mdf.summary())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 108,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"/opt/conda/lib/python3.6/site-packages/edward/util/random_variables.py:52: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
	" not np.issubdtype(value.dtype, np.float) and \\\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"200/200 [100%] ██████████████████████████████ Elapsed: 10s \| Loss: -144.868\n"
	]
	}
	],
	"source": [
	"## Set up stuff for Edward\n",
	"\n",
	"try: \n",
	" sess.close()\n",
	"except Exception:\n",
	" pass\n",
	"\n",
	"tf.reset_default_graph()\n",
	"sess = tf.InteractiveSession()\n",
	"\n",
	"\n",
	"n_s = max(data.i.cat.codes) + 1 # number of species\n",
	"n_obs = data.shape[0] # number of obs\n",
	"\n",
	"## Set up fixed effects with not-so-useful priors\n",
	"intercept = Normal(loc=tf.zeros([1]), scale=10.)\n",
	"beta = Normal(loc=tf.zeros([1]), scale=10.)\n",
	"\n",
	"## If using no priors?\n",
	"# intercept = tf.get_variable(\"intercept\", [])\n",
	"# beta = tf.get_variable(\"beta\", [])\n",
	"\n",
	"\n",
	"## Set up random effects\n",
	"sigma_s = tf.sqrt(tf.exp(tf.get_variable(\"sigma_s\", [])))\n",
	"alpha_s = Normal(loc=tf.zeros(n_s), scale=sigma_s * tf.ones(n_s))\n",
	"\n",
	"## Set up likelihood\n",
	"yhat = (tf.gather(alpha_s, data.i.values.codes.astype('int')) +\n",
	" intercept * tf.ones([n_obs]) + beta * data.log_x.values)\n",
	"y = Normal(loc=yhat, scale=tf.ones(n_obs))\n",
	"\n",
	"\n",
	"## Inference stuff ##\n",
	"\n",
	"\n",
	"## If using KLpq\n",
	"q_alpha_s = Normal(loc=tf.get_variable(\"q_alpha_s/loc\", [n_s]),\n",
	" scale=tf.nn.softplus(tf.get_variable(\"q_alpha_s/scale\", [1] )))\n",
	"q_intercept = Normal(loc=tf.get_variable(\"q_intercept/loc\", [1]),\n",
	" scale=tf.nn.softplus(tf.get_variable(\"q_intercept/scale\", [1])))\n",
	"q_beta = Normal(loc=tf.get_variable(\"q_beta/loc\", [1]),\n",
	" scale=tf.nn.softplus(tf.get_variable(\"q_beta/scale\", [1])))\n",
	"\n",
	"latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n",
	"inference = ed.KLpq(latent_vars, data={y: data.log_y.values})\n",
	"inference.run(n_samples=20, n_iter=200)\n",
	"\n",
	"## If using SGHMC\n",
	"# T = 1000\n",
	"# q_alpha_s = Empirical(params=tf.get_variable(\"q_alpha_s/params\", [T, n_s]))\n",
	"# q_intercept = Empirical(params=tf.get_variable(\"q_intercept/params\", [T, 1]))\n",
	"# q_beta = Empirical(params=tf.get_variable(\"q_beta/params\", [T, 1]))\n",
	"\n",
	"# latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n",
	"\n",
	"# inference = ed.SGHMC(latent_vars, data={y: data.log_y.values})\n",
	"# inference.run(step_size=1e-4)\n",
	"\n",
	"## If using MAP\n",
	"# q_alpha_s = PointMass(params=tf.Variable(tf.zeros(n_s))) \n",
	"# q_intercept = PointMass(params=tf.Variable(tf.zeros(1))) \n",
	"# q_beta = PointMass(params=tf.Variable(tf.zeros(1))) \n",
	"\n",
	"# latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n",
	"\n",
	"# inference = ed.MAP(latent_vars, data={y: data.log_y.values})\n",
	"# inference.run(n_iter=500)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 109,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.17341988"
	]
	},
	"execution_count": 109,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# beta.eval()\n",
	"q_beta.sample(100).eval().mean()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 112,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.006955293"
	]
	},
	"execution_count": 112,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"q_alpha_s.sample(100).eval().var()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 111,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1.4500785"
	]
	},
	"execution_count": 111,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# intercept.eval()\n",
	"q_intercept.sample(100).eval().mean()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}