Last active
April 9, 2018 22:08
-
-
Save sadatnfs/5e4a3b2b60b4c8c20930de38a4abba89 to your computer and use it in GitHub Desktop.
Testing a random effect model in Edward
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 73, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import warnings\n", | |
"warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n", | |
"\n", | |
"import tensorflow as tf\n", | |
"# import tensorflow_probability as tfp\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import statsmodels.api as sm\n", | |
"import statsmodels.formula.api as smf\n", | |
"import scipy as sp\n", | |
"import edward as ed\n", | |
"from edward.models import Normal, Empirical, PointMass\n", | |
"from rpy2.robjects import r, pandas2ri\n", | |
"pandas2ri.activate()\n", | |
"import matplotlib.pyplot as plt\n", | |
"from mpl_toolkits.mplot3d import Axes3D\n", | |
"import seaborn as sns\n", | |
"%matplotlib inline\n", | |
"plt.style.use('seaborn-whitegrid')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Model\n", | |
"\n", | |
"$$ y_i = \\alpha+ \\alpha_i + \\beta x + \\epsilon_i $$ \n", | |
"$$ \\alpha_i \\sim N(0, \\sigma^2_{\\alpha}) $$ \n", | |
"$$ \\epsilon_i \\sim N(0, \\sigma^2) $$" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>i</th>\n", | |
" <th>log_x</th>\n", | |
" <th>log_y</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>setosa</td>\n", | |
" <td>0.336472</td>\n", | |
" <td>1.629241</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>setosa</td>\n", | |
" <td>0.336472</td>\n", | |
" <td>1.589235</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>setosa</td>\n", | |
" <td>0.262364</td>\n", | |
" <td>1.547563</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>setosa</td>\n", | |
" <td>0.405465</td>\n", | |
" <td>1.526056</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>setosa</td>\n", | |
" <td>0.336472</td>\n", | |
" <td>1.609438</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" i log_x log_y\n", | |
"0 setosa 0.336472 1.629241\n", | |
"1 setosa 0.336472 1.589235\n", | |
"2 setosa 0.262364 1.547563\n", | |
"3 setosa 0.405465 1.526056\n", | |
"4 setosa 0.336472 1.609438" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"## Get Iris data\n", | |
"r.data('iris')\n", | |
"iris = r['iris']\n", | |
"\n", | |
"data = pd.DataFrame({'log_y' : np.log(iris['Sepal.Length']), \\\n", | |
" 'log_x' : np.log(iris['Petal.Length']), \\\n", | |
" 'i' : iris.Species.astype('category')})\n", | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 114, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" Mixed Linear Model Regression Results\n", | |
"======================================================\n", | |
"Model: MixedLM Dependent Variable: log_y \n", | |
"No. Observations: 150 Method: ML \n", | |
"No. Groups: 3 Scale: 0.0044 \n", | |
"Min. group size: 50 Likelihood: 186.4619\n", | |
"Max. group size: 50 Converged: Yes \n", | |
"Mean group size: 50.0 \n", | |
"------------------------------------------------------\n", | |
" Coef. Std.Err. z P>|z| [0.025 0.975]\n", | |
"------------------------------------------------------\n", | |
"Intercept 1.211 0.110 11.034 0.000 0.996 1.426\n", | |
"log_x 0.464 0.051 9.077 0.000 0.364 0.564\n", | |
"groups RE 0.025 0.347 \n", | |
"======================================================\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"## Try with stats models\n", | |
"N = data.i.unique()\n", | |
"md = smf.mixedlm(\"log_y ~ log_x\", data, groups=data[\"i\"]) \n", | |
"mdf = md.fit(reml=False) \n", | |
"print(mdf.summary())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 108, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/opt/conda/lib/python3.6/site-packages/edward/util/random_variables.py:52: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", | |
" not np.issubdtype(value.dtype, np.float) and \\\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"200/200 [100%] ██████████████████████████████ Elapsed: 10s | Loss: -144.868\n" | |
] | |
} | |
], | |
"source": [ | |
"## Set up stuff for Edward\n", | |
"\n", | |
"try: \n", | |
" sess.close()\n", | |
"except Exception:\n", | |
" pass\n", | |
"\n", | |
"tf.reset_default_graph()\n", | |
"sess = tf.InteractiveSession()\n", | |
"\n", | |
"\n", | |
"n_s = max(data.i.cat.codes) + 1 # number of species\n", | |
"n_obs = data.shape[0] # number of obs\n", | |
"\n", | |
"## Set up fixed effects with not-so-useful priors\n", | |
"intercept = Normal(loc=tf.zeros([1]), scale=10.)\n", | |
"beta = Normal(loc=tf.zeros([1]), scale=10.)\n", | |
"\n", | |
"## If using no priors?\n", | |
"# intercept = tf.get_variable(\"intercept\", [])\n", | |
"# beta = tf.get_variable(\"beta\", [])\n", | |
"\n", | |
"\n", | |
"## Set up random effects\n", | |
"sigma_s = tf.sqrt(tf.exp(tf.get_variable(\"sigma_s\", [])))\n", | |
"alpha_s = Normal(loc=tf.zeros(n_s), scale=sigma_s * tf.ones(n_s))\n", | |
"\n", | |
"## Set up likelihood\n", | |
"yhat = (tf.gather(alpha_s, data.i.values.codes.astype('int')) +\n", | |
" intercept * tf.ones([n_obs]) + beta * data.log_x.values)\n", | |
"y = Normal(loc=yhat, scale=tf.ones(n_obs))\n", | |
"\n", | |
"\n", | |
"## Inference stuff ##\n", | |
"\n", | |
"\n", | |
"## If using KLpq\n", | |
"q_alpha_s = Normal(loc=tf.get_variable(\"q_alpha_s/loc\", [n_s]),\n", | |
" scale=tf.nn.softplus(tf.get_variable(\"q_alpha_s/scale\", [1] )))\n", | |
"q_intercept = Normal(loc=tf.get_variable(\"q_intercept/loc\", [1]),\n", | |
" scale=tf.nn.softplus(tf.get_variable(\"q_intercept/scale\", [1])))\n", | |
"q_beta = Normal(loc=tf.get_variable(\"q_beta/loc\", [1]),\n", | |
" scale=tf.nn.softplus(tf.get_variable(\"q_beta/scale\", [1])))\n", | |
"\n", | |
"latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n", | |
"inference = ed.KLpq(latent_vars, data={y: data.log_y.values})\n", | |
"inference.run(n_samples=20, n_iter=200)\n", | |
"\n", | |
"## If using SGHMC\n", | |
"# T = 1000\n", | |
"# q_alpha_s = Empirical(params=tf.get_variable(\"q_alpha_s/params\", [T, n_s]))\n", | |
"# q_intercept = Empirical(params=tf.get_variable(\"q_intercept/params\", [T, 1]))\n", | |
"# q_beta = Empirical(params=tf.get_variable(\"q_beta/params\", [T, 1]))\n", | |
"\n", | |
"# latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n", | |
"\n", | |
"# inference = ed.SGHMC(latent_vars, data={y: data.log_y.values})\n", | |
"# inference.run(step_size=1e-4)\n", | |
"\n", | |
"## If using MAP\n", | |
"# q_alpha_s = PointMass(params=tf.Variable(tf.zeros(n_s))) \n", | |
"# q_intercept = PointMass(params=tf.Variable(tf.zeros(1))) \n", | |
"# q_beta = PointMass(params=tf.Variable(tf.zeros(1))) \n", | |
"\n", | |
"# latent_vars = {alpha_s: q_alpha_s, intercept: q_intercept, beta: q_beta}\n", | |
"\n", | |
"# inference = ed.MAP(latent_vars, data={y: data.log_y.values})\n", | |
"# inference.run(n_iter=500)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.17341988" | |
] | |
}, | |
"execution_count": 109, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# beta.eval()\n", | |
"q_beta.sample(100).eval().mean()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 112, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.006955293" | |
] | |
}, | |
"execution_count": 112, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"q_alpha_s.sample(100).eval().var()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1.4500785" | |
] | |
}, | |
"execution_count": 111, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# intercept.eval()\n", | |
"q_intercept.sample(100).eval().mean()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment