Skip to content

Instantly share code, notes, and snippets.

@jonathan-taylor
Last active May 31, 2016 04:08
Show Gist options
  • Select an option

  • Save jonathan-taylor/7bb4305d11e77c8d5565bb3e53a8128f to your computer and use it in GitHub Desktop.

Select an option

Save jonathan-taylor/7bb4305d11e77c8d5565bb3e53a8128f to your computer and use it in GitHub Desktop.
Comparing knockoffs to selective inference
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from __future__ import division, print_function\n",
"import os\n",
"import ipyparallel as ipp\n",
"rc = ipp.Client()\n",
"v = rc.load_balanced_view()\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import statsmodels.api as sm\n",
"\n",
"signal_opts={'signal':3.5, \n",
" 'n':300, \n",
" 'p':100, \n",
" 'k':30,\n",
" 'sigma':2}\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[stderr:0] \n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: Matrix\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: foreach\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: foreach: simple, scalable parallel programming from Revolution Analytics\n",
"Use Revolution R for scalability, fault tolerance and more.\n",
"http://www.revolutionanalytics.com\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loaded glmnet 2.0-2\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: intervals\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: \n",
"Attaching package: ‘intervals’\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following object is masked from ‘package:Matrix’:\n",
"\n",
" expand\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: survival\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"[stderr:1] \n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: Matrix\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: foreach\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: foreach: simple, scalable parallel programming from Revolution Analytics\n",
"Use Revolution R for scalability, fault tolerance and more.\n",
"http://www.revolutionanalytics.com\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loaded glmnet 2.0-2\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: intervals\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: \n",
"Attaching package: ‘intervals’\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following object is masked from ‘package:Matrix’:\n",
"\n",
" expand\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: survival\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"[stderr:2] \n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: Matrix\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: foreach\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: foreach: simple, scalable parallel programming from Revolution Analytics\n",
"Use Revolution R for scalability, fault tolerance and more.\n",
"http://www.revolutionanalytics.com\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loaded glmnet 2.0-2\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: intervals\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: \n",
"Attaching package: ‘intervals’\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following object is masked from ‘package:Matrix’:\n",
"\n",
" expand\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: survival\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"[stderr:3] \n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: Matrix\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: foreach\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: foreach: simple, scalable parallel programming from Revolution Analytics\n",
"Use Revolution R for scalability, fault tolerance and more.\n",
"http://www.revolutionanalytics.com\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loaded glmnet 2.0-2\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: intervals\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: \n",
"Attaching package: ‘intervals’\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: The following object is masked from ‘package:Matrix’:\n",
"\n",
" expand\n",
"\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n",
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/rpy2/robjects/functions.py:106: UserWarning: Loading required package: survival\n",
"\n",
" res = super(Function, self).__call__(*new_args, **new_kwargs)\n"
]
},
{
"data": {
"text/plain": [
"\u001b[0;31mOut[0:1]: \u001b[0m\n",
"array(['knockoff', 'selectiveInference', 'survival', 'intervals', 'glmnet',\n",
" 'foreach', 'Matrix', 'tools', 'stats', 'graphics', 'grDevices',\n",
" 'utils', 'datasets', 'methods', 'base'], \n",
" dtype='|S18')"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\u001b[0;31mOut[1:1]: \u001b[0m\n",
"array(['knockoff', 'selectiveInference', 'survival', 'intervals', 'glmnet',\n",
" 'foreach', 'Matrix', 'tools', 'stats', 'graphics', 'grDevices',\n",
" 'utils', 'datasets', 'methods', 'base'], \n",
" dtype='|S18')"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\u001b[0;31mOut[2:1]: \u001b[0m\n",
"array(['knockoff', 'selectiveInference', 'survival', 'intervals', 'glmnet',\n",
" 'foreach', 'Matrix', 'tools', 'stats', 'graphics', 'grDevices',\n",
" 'utils', 'datasets', 'methods', 'base'], \n",
" dtype='|S18')"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\u001b[0;31mOut[3:1]: \u001b[0m\n",
"array(['knockoff', 'selectiveInference', 'survival', 'intervals', 'glmnet',\n",
" 'foreach', 'Matrix', 'tools', 'stats', 'graphics', 'grDevices',\n",
" 'utils', 'datasets', 'methods', 'base'], \n",
" dtype='|S18')"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%px\n",
"\n",
"signal_opts={'signal':3.5, \n",
" 'n':300, \n",
" 'p':100, \n",
" 'k':30,\n",
" 'sigma':2}\n",
"\n",
"from __future__ import division, print_function\n",
"from IPython.display import HTML\n",
"\n",
"%load_ext rpy2.ipython\n",
"import rpy2.robjects as rpy\n",
"import numpy as np\n",
"import statsmodels.api as sm\n",
"\n",
"# this code below is available at https://github.com/jonathan-taylor/selective-inference\n",
"from selection.algorithms.lasso import lasso\n",
"from selection.algorithms.sqrt_lasso import choose_lambda\n",
"\n",
"%R library(glmnet)\n",
"%R library(selectiveInference)\n",
"%R library(knockoff)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data generating mechanism\n",
"\n",
"We use the same data generating mechanism as in [Barber and Candes (2016)](http://arxiv.org/abs/1602.03574)."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"\u001b[0;31mOut[0:2]: \u001b[0m3.6286017038963421e-15"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\u001b[0;31mOut[1:2]: \u001b[0m3.6286017038963421e-15"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\u001b[0;31mOut[2:2]: \u001b[0m3.6286017038963421e-15"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\u001b[0;31mOut[3:2]: \u001b[0m3.6286017038963421e-15"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%px\n",
"def cov(p, rho=0.25):\n",
" idx = np.arange(p)\n",
" return rho**np.fabs(np.subtract.outer(idx, idx))\n",
"\n",
"def sqrt_cov(p, rho=0.25):\n",
" idx = np.arange(p)\n",
" C = rho**np.fabs(np.subtract.outer(idx, idx))\n",
" return np.linalg.cholesky(C)\n",
"\n",
"# Testing we have the square-root correct\n",
"p = 2500\n",
"A = cov(p, rho=0.3)\n",
"B = sqrt_cov(p, rho=0.3)\n",
"np.linalg.norm(B.dot(B.T) - A)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%px\n",
"cholesky_factors = {}\n",
"for rho in [0, 0.25, 0.5, 0.75]:\n",
" cholesky_factors[(rho, 2500)] = sqrt_cov(2500, rho=rho)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%px\n",
"def instance(n=1000, k=30, p=400, signal=3.5, rho=0.25, sigma=1.234): # sigma there just to convince you \n",
" # we don't need to know noise level\n",
" if (rho, p) in cholesky_factors.keys():\n",
" _sqrt_cov = cholesky_factors[(rho, p)]\n",
" else:\n",
" cholesky_factors[(rho, p)] = sqrt_cov(p, rho=rho)\n",
" _sqrt_cov = cholesky_factors[(rho, p)]\n",
" X = np.random.standard_normal((n, p)).dot(_sqrt_cov.T)\n",
"\n",
" X /= (np.sqrt((X**2).sum(0))) # like normc\n",
" beta = np.zeros(p)\n",
" beta[:k] = signal * (2 * np.random.binomial(1, 0.5, size=(k,)) - 1) \n",
" np.random.shuffle(beta)\n",
"\n",
" Y = (X.dot(beta) + np.random.standard_normal(n)) * sigma\n",
" true_active = np.nonzero(beta != 0)[0]\n",
" return X, Y, true_active, beta\n",
"\n",
"X, Y, true_active, beta = instance()\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%%px\n",
"def simulate(rho=np.arange(4)/4., q=0.2, kappa=[0.7], do_cv=True, do_knockoff=True, signal_opts=signal_opts):\n",
" \n",
" n = signal_opts['n']\n",
" p = signal_opts['p']\n",
" sigma = signal_opts['sigma']\n",
" P0, PA, active_size = {}, {}, {}\n",
" full_model_FDP, full_model_power, directional_FDP = {}, {}, {}\n",
" effective_lam = {}\n",
" kappa = sorted(kappa)[::-1]\n",
"\n",
" for r in rho:\n",
" signal_opts['rho'] = r\n",
" X, Y, true_active, beta = instance(**signal_opts)\n",
"\n",
" theory_lam_lasso = np.mean(np.fabs(np.dot(X.T, np.random.standard_normal((n, 2000)))).max(0) * sigma)\n",
"\n",
" # square-root LASSO at several kappa values\n",
"\n",
" for _kap in kappa:\n",
"\n",
" lam = choose_lambda(X)\n",
" L = lasso.sqrt_lasso(X, Y, _kap * lam)\n",
" L.fit()\n",
" S = L.summary('onesided')\n",
" \n",
" _kap = r'$\\kappa=%0.2f$' % _kap\n",
" active_size.setdefault((_kap, r), []).append(len(L.active))\n",
" effective_lam.setdefault((_kap, r), []).append(L._penalty.weights[0] / theory_lam_lasso)\n",
" \n",
" # keep p-values when screening \n",
" \n",
" if set(L.active).issuperset(true_active):\n",
" p0 = [_pv for _pv, v in zip(S['pval'], S['variable']) if v not in true_active]\n",
" pA = [_pv for _pv, v in zip(S['pval'], S['variable']) if v in true_active]\n",
" else:\n",
" p0 = []\n",
" pA = []\n",
" \n",
" P0.setdefault((_kap, r), []).extend(p0) \n",
" PA.setdefault((_kap, r), []).extend(pA)\n",
" if len(L.active) > 0:\n",
" selected = sm.stats.multipletests(S['pval'], q, 'fdr_bh')[0]\n",
" else:\n",
" selected = np.zeros(S.shape, np.bool)\n",
" type1_errors = selected * np.array([v not in true_active for v in S['variable']])\n",
" if hasattr(L, 'active_signs'):\n",
" sign_errors = selected * np.array([(s != np.sign(beta[v])) * (v in true_active) for v, s in \n",
" zip(S['variable'], L.active_signs)])\n",
" else:\n",
" sign_errors = 0\n",
"\n",
" full_model_FDP.setdefault((_kap, r), []).append(np.sum(type1_errors) / max(np.sum(selected), 1))\n",
" directional_FDP.setdefault((_kap, r), []).append(np.sum(type1_errors + sign_errors) / max(np.sum(selected), 1))\n",
" full_model_power.setdefault((_kap, r), []).append(np.sum(selected * np.array([v in true_active for v in S['variable']])) / len(true_active))\n",
" \n",
" # now do cv.glmnet and Lee et al. with\n",
" \n",
" if do_cv:\n",
" \n",
" %R -i X,Y Y = as.matrix(Y)\n",
" %R X = as.matrix(X)\n",
" %R G_CV = cv.glmnet(X, Y, standardize=FALSE, intercept=FALSE)\n",
" %R sigma_hat = estimateSigma(X, Y, standardize=FALSE, intercept=FALSE)$sigmahat\n",
" %R lamhat = G_CV$lambda.min\n",
" %R fit = glmnet(X, Y, standardize=FALSE,intercept=FALSE)\n",
" %R Yhat = predict(fit, X, s = lamhat)\n",
" %R nz = sum(predict(fit, s = lamhat, type = \"coef\") != 0)\n",
" %R sigma_hat = sqrt(sum((Y - Yhat)^2)/(length(Y) - nz - 1))\n",
"\n",
" sigma_hatR = %R sigma_hat\n",
" lam_1SE = %R G_CV$lambda.1se\n",
" lam_minCV = %R G_CV$lambda.min\n",
" \n",
" lam_1SE *= n\n",
" lam_minCV *= n\n",
"\n",
" for method, lam, sigma_hat in zip(['1SE', 'Lee et al.'], \n",
" [float(lam_1SE), theory_lam_lasso],\n",
" [float(sigma_hatR), sigma]):\n",
" L = lasso.gaussian(X, Y, float(lam), sigma=sigma_hat) \n",
" L.fit() \n",
" S = L.summary('onesided')\n",
"\n",
" active_size.setdefault((method, r), []).append(len(L.active))\n",
" effective_lam.setdefault((method, r), []).append(L._penalty.weights[0] / theory_lam_lasso * sigma**2)\n",
"\n",
" # keep p-values when screening \n",
" \n",
" if set(L.active).issuperset(true_active):\n",
" p0 = [_pv for _pv, v in zip(S['pval'], S['variable']) if v not in true_active]\n",
" pA = [_pv for _pv, v in zip(S['pval'], S['variable']) if v in true_active]\n",
" else:\n",
" p0 = []\n",
" pA = []\n",
"\n",
" P0.setdefault((method, r), []).extend(p0) \n",
" PA.setdefault((method, r), []).extend(pA)\n",
"\n",
" if len(L.active) > 0:\n",
" selected = sm.stats.multipletests(S['pval'], q, 'fdr_bh')[0]\n",
" else:\n",
" selected = np.zeros(S.shape, np.bool)\n",
"\n",
" type1_errors = selected * np.array([v not in true_active for v in S['variable']])\n",
" if hasattr(L, 'active_signs'):\n",
" sign_errors = selected * np.array([(s != np.sign(beta[v])) * (v in true_active) for v, s in \n",
" zip(S['variable'], L.active_signs)])\n",
" else:\n",
" sign_errors = 0\n",
"\n",
" full_model_FDP.setdefault((method, r), []).append(np.sum(type1_errors) / max(np.sum(selected), 1))\n",
" directional_FDP.setdefault((method, r), []).append(np.sum(type1_errors + sign_errors) / max(np.sum(selected), 1))\n",
" full_model_power.setdefault((method, r), []).append(np.sum(selected * np.array([v in true_active for v in S['variable']])) / len(true_active))\n",
"\n",
" if (signal_opts['n'] > 2*signal_opts['p']) and do_knockoff:\n",
"\n",
" method = 'knockoff'\n",
" %R -i q\n",
" %R Y=as.matrix(Y)\n",
" %R Y=as.numeric(Y)\n",
" %R X=as.matrix(X)\n",
" %R KO=knockoff.filter(X=X, y=Y, fdr=q, threshold=\"knockoff\")\n",
" selected_idx = rpy.r('KO$selected')\n",
" selected = np.zeros(p, np.bool)\n",
" if selected_idx is not None:\n",
" selected_idx = np.array(selected_idx) - 1 # R vs. python indexing\n",
" selected[selected_idx] = True\n",
" \n",
" type1_errors = selected * np.array([v not in true_active for v in np.arange(p)])\n",
" sign_errors = 0 # don't know how to extract signs from R package\n",
"\n",
" full_model_FDP.setdefault((method, r), []).append(np.sum(type1_errors) / max(np.sum(selected), 1))\n",
" directional_FDP.setdefault((method, r), []).append(np.sum(type1_errors + sign_errors) / max(np.sum(selected), 1))\n",
" full_model_power.setdefault((method, r), []).append(np.sum(selected * np.array([v in true_active for v in np.arange(p)])) / len(true_active))\n",
" \n",
" return P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"@v.parallel(block=False)\n",
"def batch(nullarg, **signal_opts):\n",
" return simulate(signal_opts=signal_opts)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def save_data(outfilename, P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam):\n",
" \n",
" if os.path.exists(outfilename):\n",
" old_results = np.load(outfilename)\n",
" else:\n",
" old_results = None\n",
"\n",
" results = {}\n",
" \n",
" for v, n in zip([P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam],\n",
" ['P0', 'PA', 'full_model_FDP', 'directional_FDP', 'full_model_power', 'active_size', 'effective_lam']):\n",
" for key in v.keys():\n",
" nkey = \"__\".join([n] + [str(k) for k in key])\n",
" if old_results == None:\n",
" results[nkey] = v[key]\n",
" elif nkey in old_results:\n",
" results[nkey] = np.hstack([old_results[nkey], v[key]])\n",
" np.savez(outfilename, **results)\n",
" \n",
"def load_data(infilename):\n",
" \n",
" results = np.load(infilename)\n",
"\n",
" P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam = {}, {}, {}, {}, {}, {}, {}\n",
" for v, n in zip([P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam],\n",
" ['P0', 'PA', 'full_model_FDP', 'directional_FDP', 'full_model_power', 'active_size', 'effective_lam']):\n",
" keys = [(key, key.split('__')[1:]) for key in results.keys() if key[:len(n)] == n]\n",
" for k1, k2 in keys:\n",
" _k, r = k2\n",
" r = float(r)\n",
" v[(_k,r)] = results[k1]\n",
"\n",
" return P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def figure_panel(filename):\n",
" \n",
" P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam = load_data(filename)\n",
"\n",
" first_fig = plt.figure(figsize=(20,7))\n",
"\n",
" # Model FDR\n",
"\n",
" modelFDP_ax = plt.subplot(1,3,1)\n",
" modelFDP_dict = {}\n",
" for m, r in sorted(full_model_FDP.keys()):\n",
" modelFDP_dict.setdefault(m, []).append((r, np.mean(full_model_FDP[(m, r)])))\n",
" for m in sorted(modelFDP_dict.keys()):\n",
" modelFDP_dict[m] = np.array(modelFDP_dict[m])\n",
" modelFDP_ax.plot(modelFDP_dict[m][:,0], modelFDP_dict[m][:,1], label=m)\n",
" modelFDP_ax.legend(loc='lower left')\n",
" modelFDP_ax.set_ylabel(r'E(Model FDP)($\\rho$)', fontsize=20)\n",
" modelFDP_ax.set_xlabel(r'$\\rho$', fontsize=20)\n",
"\n",
" # Directional FDR\n",
"\n",
" dirFDP_ax = plt.subplot(1,3,2)\n",
" dirFDP_dict = {}\n",
" for m, r in sorted(full_model_FDP.keys()):\n",
" dirFDP_dict.setdefault(m, []).append((r, np.mean(directional_FDP[(m, r)])))\n",
" for m in sorted(modelFDP_dict.keys()):\n",
" dirFDP_dict[m] = np.array(modelFDP_dict[m])\n",
" dirFDP_ax.plot(dirFDP_dict[m][:,0], dirFDP_dict[m][:,1], label=m)\n",
" dirFDP_ax.legend(loc='lower left')\n",
" dirFDP_ax.set_ylabel(r'E(Directional FDP)($\\rho$)', fontsize=20)\n",
" dirFDP_ax.set_xlabel(r'$\\rho$', fontsize=20)\n",
"\n",
" # Power \n",
"\n",
" power_ax = plt.subplot(1,3,3)\n",
" power_dict = {}\n",
" for m, r in sorted(full_model_FDP.keys()):\n",
" power_dict.setdefault(m, []).append((r, np.mean(full_model_power[(m, r)])))\n",
" for m in sorted(power_dict.keys()):\n",
" power_dict[m] = np.array(power_dict[m])\n",
" power_ax.plot(power_dict[m][:,0], power_dict[m][:,1], label=m)\n",
" power_ax.legend(loc='lower left')\n",
" power_ax.set_ylabel(r'Power($\\rho$)', fontsize=20)\n",
" power_ax.set_xlabel(r'$\\rho$', fontsize=20)\n",
" \n",
" first_fig.savefig(filename.split('.')[0] + '_figure_panel.pdf')\n",
" plt.close(first_fig)\n",
" \n",
"def figure_null(filename):\n",
" \n",
" P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam = load_data(filename)\n",
"\n",
" null_fig = plt.figure(figsize=(12,12))\n",
" U = np.linspace(0, 1, 201)\n",
" null_ax = null_fig.gca()\n",
" null_dict = {}\n",
" for m, r in sorted(P0.keys()):\n",
" null_dict.setdefault(m, []).extend(P0[(m,r)])\n",
" for m in sorted(null_dict.keys()):\n",
" if null_dict[m]:\n",
" null_ax.plot(U, sm.distributions.ECDF(null_dict[m])(U), label=m)\n",
" null_ax.legend(loc='lower right')\n",
" null_ax.set_ylabel(r'ECDF(p)', fontsize=20)\n",
" null_ax.set_xlabel(r'$p', fontsize=20)\n",
"\n",
" null_fig.savefig(filename.split('.')[0] + '_figure_null.pdf')\n",
" plt.close(null_fig)\n",
" \n",
"def figure_alt(filename):\n",
" \n",
" P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam = load_data(filename)\n",
"\n",
" alt_fig = plt.figure(figsize=(12,12))\n",
" U = np.linspace(0, 1, 201)\n",
" alt_ax = alt_fig.gca()\n",
" alt_dict = {}\n",
" for m, r in sorted(PA.keys()):\n",
" alt_dict.setdefault(m, []).extend(PA[(m,r)])\n",
" for m in sorted(alt_dict.keys()):\n",
" if alt_dict[m]:\n",
" alt_ax.plot(U, sm.distributions.ECDF(alt_dict[m])(U), label=m)\n",
" alt_ax.legend(loc='lower right')\n",
" alt_ax.set_ylabel(r'ECDF(p)', fontsize=20)\n",
" alt_ax.set_xlabel(r'$p', fontsize=20)\n",
"\n",
" alt_fig.savefig(filename.split('.')[0] + '_figure_alt.pdf')\n",
" plt.close(alt_fig)\n",
"\n",
"def figure_active_set(filename):\n",
" \n",
" P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam = load_data(filename)\n",
"\n",
" active_fig = plt.figure(figsize=(12,12))\n",
" active_ax = active_fig.gca()\n",
" active_dict = {}\n",
" for m, r in sorted(active_size.keys()):\n",
" active_dict.setdefault(m, []).append((r, np.mean(active_size[(m, r)])))\n",
" for m in sorted(active_dict.keys()):\n",
" active_dict[m] = np.array(active_dict[m])\n",
" active_ax.plot(active_dict[m][:,0], active_dict[m][:,1], label=m)\n",
" active_ax.legend(loc='lower left')\n",
" active_ax.set_ylabel(r'E(# active)($\\rho$)', fontsize=20)\n",
" active_ax.set_xlabel(r'$\\rho$', fontsize=20)\n",
"\n",
" active_fig.savefig(filename.split('.')[0] + '_figure_active.pdf')\n",
" plt.close(active_fig)\n",
" \n",
"def figure_lam(filename):\n",
" \n",
" P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam = load_data(filename)\n",
"\n",
" lam_fig = plt.figure(figsize=(12,12))\n",
" lam_ax = lam_fig.gca()\n",
" lam_dict = {}\n",
" for m, r in sorted(effective_lam.keys()):\n",
" lam_dict.setdefault(m, []).append((r, np.mean(effective_lam[(m, r)])))\n",
" for m in sorted(lam_dict.keys()):\n",
" lam_dict[m] = np.array(lam_dict[m])\n",
" lam_ax.plot(lam_dict[m][:,0], lam_dict[m][:,1], label=m)\n",
" lam_ax.legend(loc='lower left')\n",
" lam_ax.set_ylabel(r'E($\\hat{\\lambda} / \\lambda_{theory}$)($\\rho$)', fontsize=20)\n",
" lam_ax.set_xlabel(r'$\\rho$', fontsize=20)\n",
"\n",
" lam_fig.savefig(filename.split('.')[0] + '_figure_lam.pdf')\n",
" plt.close(lam_fig)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def make_plots(nsim=100, signal_opts=signal_opts):\n",
" \n",
" @v.parallel(block=False)\n",
" def batch(nullarg):\n",
" return simulate(signal_opts=signal_opts)\n",
" results = batch.map(range(nsim))\n",
"\n",
" P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam = {}, {}, {}, {}, {}, {}, {}\n",
" for i, r in enumerate(results):\n",
" _P0, _PA, _full_model_FDP, _directional_FDP, _full_model_power, _active_size, _effective_lam = r\n",
" for d1, d2 in zip([P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam],\n",
" [_P0, _PA, _full_model_FDP, _directional_FDP, _full_model_power, _active_size, _effective_lam]):\n",
" for key in d2.keys():\n",
" d1.setdefault(key,[]).extend(d2[key])\n",
" save_data('lowdim.npz', P0, PA, full_model_FDP, directional_FDP, full_model_power, active_size, effective_lam)\n",
" print('result %d received' % i)\n",
" if True:\n",
" \n",
" figure_panel('lowdim.npz')\n",
" figure_null('lowdim.npz')\n",
" figure_alt('lowdim.npz')\n",
" figure_lam('lowdim.npz')\n",
" figure_active_set('lowdim.npz')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"result 0 received\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/jonathantaylor/anaconda/envs/py27/lib/python2.7/site-packages/matplotlib/axes/_axes.py:519: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.\n",
" warnings.warn(\"No labelled objects found. \"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"result 1 received\n",
"result 2 received\n",
"result 3 received\n",
"result 4 received\n",
"result 5 received\n",
"result 6 received\n",
"result 7 received\n",
"result 8 received\n",
"result 9 received\n",
"result 10 received\n",
"result 11 received\n",
"result 12 received\n",
"result 13 received\n",
"result 14 received\n",
"result 15 received\n",
"result 16 received\n",
"result 17 received\n",
"result 18 received\n",
"result 19 received\n",
"result 20 received\n",
"result 21 received\n",
"result 22 received\n",
"result 23 received\n",
"result 24 received\n",
"result 25 received\n",
"result 26 received\n",
"result 27 received\n",
"result 28 received\n",
"result 29 received\n",
"result 30 received\n"
]
}
],
"source": [
"make_plots()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment