Skip to content

Instantly share code, notes, and snippets.

@twiecki
Created March 27, 2013 00:58
Show Gist options
  • Save twiecki/5250693 to your computer and use it in GitHub Desktop.
Save twiecki/5250693 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "analysis_dbs"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"import hddm\n",
"print hddm.__version__\n",
"import kabuki\n",
"print kabuki.__version__\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from scipy import stats\n",
"\n",
"from sklearn.grid_search import GridSearchCV\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.metrics import precision_score\n",
"from sklearn.metrics import recall_score\n",
"from sklearn.svm import SVC\n",
"from sklearn.cross_validation import LeaveOneOut\n",
"from sklearn import preprocessing"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0.5dev\n",
"0.5dev\n"
]
}
],
"prompt_number": 444
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Load and process data"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data = hddm.load_csv('../data/PD_PS.csv')\n",
"data['dbs_state'] = 'on'\n",
"data['dbs_state'][data.dbs == 0] = 'off'"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 257
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"data_collapsed = data.copy()\n",
"data_collapsed['subj_idx'] = np.int16(data_collapsed['subj_idx'])\n",
"data_collapsed['subj_idx'].ix[data_collapsed.dbs_state == 'off'] += 14"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 285
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fit model"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"m = hddm.models.HDDMGamma(data_collapsed, depends_on={'v': 'conf', 'a': 'conf'}, plot_subjs=True)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 363
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"m.sample(5000, burn=2000)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
" \r",
"[****************100%******************] 5000 of 5000 complete"
]
},
{
"output_type": "pyout",
"prompt_number": 364,
"text": [
"<pymc.MCMC.MCMC at 0x10f2bf0c>"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n"
]
}
],
"prompt_number": 364
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Helper functions"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_subj_nodes(m):\n",
" m.gen_stats()\n",
" nodes = m.nodes_db.ix[(m.nodes_db.stochastic == True) & (m.nodes_db.subj == True)]\n",
" return nodes\n",
"\n",
"def convert_to_features(m):\n",
" dfs = []\n",
" for subj, data in get_subj_nodes(m)[['mean', 'subj_idx', 'knode_name']].groupby(['subj_idx']):\n",
" reindex = [index.split('.')[0] for index in data.index]\n",
" data.index = reindex\n",
" dfs.append(pd.DataFrame(data['mean'], columns=[subj]).T)\n",
"\n",
" f_vec = pd.concat(dfs)\n",
" return f_vec"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 384
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create feature vectores, scale etc"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"X = convert_to_features(m).drop(['v_subj(HC)', 'v_subj(LC)'], axis=1)\n",
"\n",
"X_scaled = preprocessing.scale(X)\n",
"y = np.concatenate([np.ones(14), np.zeros(14)])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 439
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"param_grid = [\n",
" {'C': [.01, .1, 1, 10, 100, 1000], 'kernel': ['linear']},\n",
" {'C': [.01, .1, 1, 10, 100, 1000], 'gamma': [10, 1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']},\n",
" ]\n",
"\n",
"clf = GridSearchCV(SVC(), param_grid, score_func=metrics.zero_one_score)\n",
"clf.fit(X_scaled, y, cv=5)\n",
"print 'Best model:', clf.best_estimator\n",
"print 'Best cv score:', clf.best_score"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Best model: SVC(C=10, cache_size=200, coef0=0.0, degree=3, gamma=1, kernel=rbf,\n",
" probability=False, scale_C=False, shrinking=True, tol=0.001)\n",
"Best cv score: 0.72\n"
]
}
],
"prompt_number": 443
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment