Created
November 15, 2013 19:26
-
-
Save tdhopper/7490144 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "metadata": { | |
| "name": "" | |
| }, | |
| "nbformat": 3, | |
| "nbformat_minor": 0, | |
| "worksheets": [ | |
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "from load_headers_and_data import *\n", | |
| "print X.shape\n", | |
| "print y.shape" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "(418L, 31099L)\n", | |
| "(418L,)\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 1 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 2, | |
| "metadata": {}, | |
| "source": [ | |
| "Fit Models" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pipe = {}" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 2 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "cv = cross_validation.KFold(n = len(y), n_folds = N_FOLDS)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 3 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "score_fun = metrics.make_scorer(metrics.matthews_corrcoef)" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 7 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "Dummy Models for Baseline" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "dummy1 = dummy.DummyClassifier(strategy=\"uniform\")\n", | |
| "scores = cross_validation.cross_val_score(dummy1, X, y, cv = cv, n_jobs=1, scoring = score_fun)\n", | |
| "print \"Score mean:\", scores.mean()\n", | |
| "print \"Score std: \", scores.std()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Score mean: 0.00323086729798\n", | |
| "Score std: 0.195687165821\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 8 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "dummy1 = dummy.DummyClassifier(strategy=\"most_frequent\")\n", | |
| "scores = cross_validation.cross_val_score(dummy1, X, y, cv = cv, n_jobs=1)\n", | |
| "print \"Score mean:\", scores.mean()\n", | |
| "print \"Score std: \", scores.std()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Score mean: 0.624157955865\n", | |
| "Score std: 0.114053426249\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 9 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "Analysis of best feature" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "What if I train a decision tree on the best feature every time?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "dt = tree.DecisionTreeClassifier()\n", | |
| "scores = []\n", | |
| "for train, test in cv:\n", | |
| " kbest = feature_selection.SelectKBest(k=X.shape[1])\n", | |
| " kbest.fit(X[train, :], y[train])\n", | |
| " best_col = df.columns[np.argmax(kbest.scores_)]\n", | |
| " X_best_col = df[best_col][:,np.newaxis]\n", | |
| " dt.fit(X_best_col[train, :], y[train])\n", | |
| " scores.append( score_fun(dt, X_best_col[test, :], y[test]))\n", | |
| "np.array(scores).mean()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 11, | |
| "text": [ | |
| "0.73622166219037743" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 11 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "What if I train a decision tree on the worse feature every time?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "dt = tree.DecisionTreeClassifier()\n", | |
| "scores = []\n", | |
| "for train, test in cv:\n", | |
| " kbest = feature_selection.SelectKBest(k=X.shape[1])\n", | |
| " kbest.fit(X[train, :], y[train])\n", | |
| " best_col = df.columns[np.argmin(kbest.scores_)]\n", | |
| " X_best_col = df[best_col][:,np.newaxis]\n", | |
| " dt.fit(X_best_col[train, :], y[train])\n", | |
| " scores.append( score_fun(dt, X_best_col[test, :], y[test]))\n", | |
| "np.array(scores).mean()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 12, | |
| "text": [ | |
| "-0.016178853382434934" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 12 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "k-Neighbor" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pipe[\"kn\"] = Pipeline([\n", | |
| " (\"scale\", preprocessing.StandardScaler(copy=False)),\n", | |
| " (\"dim_red\", feature_selection.SelectKBest(k = 100)),\n", | |
| " (\"classifier\", neighbors.KNeighborsClassifier())\n", | |
| " ])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 15 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "params = {\n", | |
| " \"classifier__n_neighbors\" : [1, 5, 20, 100, 500], # Try linear, RBF, and Polynomial kernels\n", | |
| " \"dim_red__k\" : [1, 50, 100, \"all\"] # Try features in these top percentiles\n", | |
| "}" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 23 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs = grid_search.GridSearchCV(pipe[\"kn\"], param_grid = params, cv = cv, n_jobs = 3, refit=False, scoring = score_fun)\n", | |
| "gs.fit(X, y)\n", | |
| "gs.best_params_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 24, | |
| "text": [ | |
| "{'classifier__n_neighbors': 20, 'dim_red__k': 50}" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 24 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs.best_score_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 25, | |
| "text": [ | |
| "0.84371026174255259" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 25 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs.grid_scores_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 26, | |
| "text": [ | |
| "[mean: 0.73616, std: 0.12639, params: {'dim_red__k': 1, 'classifier__n_neighbors': 1},\n", | |
| " mean: 0.73853, std: 0.14516, params: {'dim_red__k': 50, 'classifier__n_neighbors': 1},\n", | |
| " mean: 0.75302, std: 0.12697, params: {'dim_red__k': 100, 'classifier__n_neighbors': 1},\n", | |
| " mean: 0.75406, std: 0.11712, params: {'dim_red__k': 'all', 'classifier__n_neighbors': 1},\n", | |
| " mean: 0.79128, std: 0.10681, params: {'dim_red__k': 1, 'classifier__n_neighbors': 5},\n", | |
| " mean: 0.80946, std: 0.07191, params: {'dim_red__k': 50, 'classifier__n_neighbors': 5},\n", | |
| " mean: 0.81912, std: 0.07240, params: {'dim_red__k': 100, 'classifier__n_neighbors': 5},\n", | |
| " mean: 0.78406, std: 0.10592, params: {'dim_red__k': 'all', 'classifier__n_neighbors': 5},\n", | |
| " mean: 0.81211, std: 0.08987, params: {'dim_red__k': 1, 'classifier__n_neighbors': 20},\n", | |
| " mean: 0.84371, std: 0.07097, params: {'dim_red__k': 50, 'classifier__n_neighbors': 20},\n", | |
| " mean: 0.83972, std: 0.07850, params: {'dim_red__k': 100, 'classifier__n_neighbors': 20},\n", | |
| " mean: 0.65437, std: 0.12713, params: {'dim_red__k': 'all', 'classifier__n_neighbors': 20},\n", | |
| " mean: 0.78886, std: 0.10765, params: {'dim_red__k': 1, 'classifier__n_neighbors': 100},\n", | |
| " mean: 0.75037, std: 0.10440, params: {'dim_red__k': 50, 'classifier__n_neighbors': 100},\n", | |
| " mean: 0.75275, std: 0.09795, params: {'dim_red__k': 100, 'classifier__n_neighbors': 100},\n", | |
| " mean: 0.42851, std: 0.10739, params: {'dim_red__k': 'all', 'classifier__n_neighbors': 100},\n", | |
| " mean: 0.00000, std: 0.00000, params: {'dim_red__k': 1, 'classifier__n_neighbors': 500},\n", | |
| " mean: 0.00000, std: 0.00000, params: {'dim_red__k': 50, 'classifier__n_neighbors': 500},\n", | |
| " mean: 0.00000, std: 0.00000, params: {'dim_red__k': 100, 'classifier__n_neighbors': 500},\n", | |
| " mean: 0.00000, std: 0.00000, params: {'dim_red__k': 'all', 'classifier__n_neighbors': 500}]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 26 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "Random Forest" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pipe[\"rf\"] = Pipeline([\n", | |
| " (\"scale\", preprocessing.StandardScaler(copy=False)),\n", | |
| " (\"dim_red\", feature_selection.RFE(estimator=linear_model.LogisticRegression())),\n", | |
| " (\"classifier\", ensemble.RandomForestClassifier())\n", | |
| " ])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 16 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "params = {\n", | |
| " \"classifier__n_estimators\" : [1, 10, 100, 100], # Try linear, RBF, and Polynomial kernels\n", | |
| " \"dim_red__step\" : [1, 50, 100],\n", | |
| " \"dim_red__n_features_to_select\" : [150, 1000, 15000, 30000]\n", | |
| "}" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 17 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs = grid_search.GridSearchCV(pipe[\"rf\"], param_grid = params, cv = cv, n_jobs = 3, refit=False)\n", | |
| "gs.fit(X, y)\n", | |
| "gs.best_params_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": "*" | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs.grid_scores_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 12, | |
| "text": [ | |
| "[mean: 0.87560, std: 0.06390, params: {'dim_red__k': 1, 'classifier__n_estimators': 1},\n", | |
| " mean: 0.85407, std: 0.08343, params: {'dim_red__k': 50, 'classifier__n_estimators': 1},\n", | |
| " mean: 0.85407, std: 0.04748, params: {'dim_red__k': 100, 'classifier__n_estimators': 1},\n", | |
| " mean: 0.83732, std: 0.08554, params: {'dim_red__k': 'all', 'classifier__n_estimators': 1},\n", | |
| " mean: 0.88517, std: 0.06015, params: {'dim_red__k': 1, 'classifier__n_estimators': 10},\n", | |
| " mean: 0.89952, std: 0.05129, params: {'dim_red__k': 50, 'classifier__n_estimators': 10},\n", | |
| " mean: 0.92344, std: 0.03883, params: {'dim_red__k': 100, 'classifier__n_estimators': 10},\n", | |
| " mean: 0.90909, std: 0.04688, params: {'dim_red__k': 'all', 'classifier__n_estimators': 10},\n", | |
| " mean: 0.88038, std: 0.05740, params: {'dim_red__k': 1, 'classifier__n_estimators': 100},\n", | |
| " mean: 0.92344, std: 0.04179, params: {'dim_red__k': 50, 'classifier__n_estimators': 100},\n", | |
| " mean: 0.91866, std: 0.04232, params: {'dim_red__k': 100, 'classifier__n_estimators': 100},\n", | |
| " mean: 0.92105, std: 0.04985, params: {'dim_red__k': 'all', 'classifier__n_estimators': 100},\n", | |
| " mean: 0.88038, std: 0.05740, params: {'dim_red__k': 1, 'classifier__n_estimators': 100},\n", | |
| " mean: 0.91866, std: 0.04096, params: {'dim_red__k': 50, 'classifier__n_estimators': 100},\n", | |
| " mean: 0.92344, std: 0.03905, params: {'dim_red__k': 100, 'classifier__n_estimators': 100},\n", | |
| " mean: 0.92584, std: 0.04806, params: {'dim_red__k': 'all', 'classifier__n_estimators': 100}]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 12 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "Logistic Regression" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Specify a \"pipeline\" that defines the entire modeling process. In this case, scale the data, select the 100 best features, and train logistic regression." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pipe[\"lr\"] = Pipeline([\n", | |
| " (\"scale\", preprocessing.StandardScaler(copy=False)),\n", | |
| " (\"dim_red\", feature_selection.SelectKBest(k = 100)),\n", | |
| " (\"classifier\", linear_model.LogisticRegression())\n", | |
| " ])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 27 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Perform 3-fold cross validation on this pipeline. Return the cross validation scores." | |
| ] | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 3, | |
| "metadata": {}, | |
| "source": [ | |
| "Support Vector Machine" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Next, I create a pipeline for support vector machines. This time, I'm going to select features based on percentile. " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pipe[\"svm\"] = Pipeline([\n", | |
| " (\"scale\", preprocessing.StandardScaler()),\n", | |
| " (\"dim_red\", feature_selection.SelectKBest()),\n", | |
| " (\"classifier\", svm.SVC()) # SVM\n", | |
| " ])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 38 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "These are the parameters I want to try for various parts of the pipeline. " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "params = {\n", | |
| " \"classifier__kernel\" : [\"linear\", \"rbf\", \"poly\"], # Try linear, RBF, and Polynomial kernels\n", | |
| " \"classifier__C\" : [1, 100], # Try a penality of 1 and 100 in the SVM\n", | |
| " \"dim_red__k\" : [10, 100, 1000, \"all\"] # Try features in these top percentiles\n", | |
| "}" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 40 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "I want to search over all possible combinations of these parameters. Search time increases exponentially with the number of parameters to try. \n", | |
| "\n", | |
| "After searching over all the configurations, print the best configuration. " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs = grid_search.GridSearchCV(pipe[\"svm\"], param_grid = params, cv = cv, n_jobs = 3, refit=False, scoring = score_fun)\n", | |
| "gs.fit(X, y)\n", | |
| "gs.best_params_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 41, | |
| "text": [ | |
| "{'classifier__C': 1, 'classifier__kernel': 'rbf', 'dim_red__k': 100}" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 41 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs.best_score_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 43, | |
| "text": [ | |
| "0.83357184795663886" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 43 | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "We can also see the cross validation score for every configuration" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs.grid_scores_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 42, | |
| "text": [ | |
| "[mean: 0.83061, std: 0.08029, params: {'dim_red__k': 10, 'classifier__kernel': 'linear', 'classifier__C': 1},\n", | |
| " mean: 0.72683, std: 0.15204, params: {'dim_red__k': 100, 'classifier__kernel': 'linear', 'classifier__C': 1},\n", | |
| " mean: 0.80353, std: 0.11677, params: {'dim_red__k': 1000, 'classifier__kernel': 'linear', 'classifier__C': 1},\n", | |
| " mean: 0.79727, std: 0.10933, params: {'dim_red__k': 'all', 'classifier__kernel': 'linear', 'classifier__C': 1},\n", | |
| " mean: 0.82217, std: 0.08251, params: {'dim_red__k': 10, 'classifier__kernel': 'rbf', 'classifier__C': 1},\n", | |
| " mean: 0.83357, std: 0.08625, params: {'dim_red__k': 100, 'classifier__kernel': 'rbf', 'classifier__C': 1},\n", | |
| " mean: 0.82350, std: 0.09856, params: {'dim_red__k': 1000, 'classifier__kernel': 'rbf', 'classifier__C': 1},\n", | |
| " mean: 0.81307, std: 0.09695, params: {'dim_red__k': 'all', 'classifier__kernel': 'rbf', 'classifier__C': 1},\n", | |
| " mean: 0.66720, std: 0.10597, params: {'dim_red__k': 10, 'classifier__kernel': 'poly', 'classifier__C': 1},\n", | |
| " mean: 0.70275, std: 0.11862, params: {'dim_red__k': 100, 'classifier__kernel': 'poly', 'classifier__C': 1},\n", | |
| " mean: 0.68890, std: 0.11709, params: {'dim_red__k': 1000, 'classifier__kernel': 'poly', 'classifier__C': 1},\n", | |
| " mean: 0.46313, std: 0.09723, params: {'dim_red__k': 'all', 'classifier__kernel': 'poly', 'classifier__C': 1},\n", | |
| " mean: 0.83061, std: 0.08029, params: {'dim_red__k': 10, 'classifier__kernel': 'linear', 'classifier__C': 100},\n", | |
| " mean: 0.68612, std: 0.11307, params: {'dim_red__k': 100, 'classifier__kernel': 'linear', 'classifier__C': 100},\n", | |
| " mean: 0.80353, std: 0.11677, params: {'dim_red__k': 1000, 'classifier__kernel': 'linear', 'classifier__C': 100},\n", | |
| " mean: 0.79727, std: 0.10933, params: {'dim_red__k': 'all', 'classifier__kernel': 'linear', 'classifier__C': 100},\n", | |
| " mean: 0.71885, std: 0.10623, params: {'dim_red__k': 10, 'classifier__kernel': 'rbf', 'classifier__C': 100},\n", | |
| " mean: 0.76182, std: 0.12914, params: {'dim_red__k': 100, 'classifier__kernel': 'rbf', 'classifier__C': 100},\n", | |
| " mean: 0.81595, std: 0.10767, params: {'dim_red__k': 1000, 'classifier__kernel': 'rbf', 'classifier__C': 100},\n", | |
| " mean: 0.81937, std: 0.11506, params: {'dim_red__k': 'all', 'classifier__kernel': 'rbf', 'classifier__C': 100},\n", | |
| " mean: 0.73189, std: 0.14555, params: {'dim_red__k': 10, 'classifier__kernel': 'poly', 'classifier__C': 100},\n", | |
| " mean: 0.77410, std: 0.12403, params: {'dim_red__k': 100, 'classifier__kernel': 'poly', 'classifier__C': 100},\n", | |
| " mean: 0.80180, std: 0.11622, params: {'dim_red__k': 1000, 'classifier__kernel': 'poly', 'classifier__C': 100},\n", | |
| " mean: 0.55779, std: 0.12090, params: {'dim_red__k': 'all', 'classifier__kernel': 'poly', 'classifier__C': 100}]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 42 | |
| }, | |
| { | |
| "cell_type": "heading", | |
| "level": 2, | |
| "metadata": {}, | |
| "source": [ | |
| "Features vs Score" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "pipe_feature_scoring = Pipeline([\n", | |
| " (\"dim_red\", feature_selection.SelectKBest(k = 8000)),\n", | |
| " (\"classifier\", linear_model.LogisticRegression(C=.01, penalty=\"L1\"))\n", | |
| " ])" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [], | |
| "prompt_number": 33 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "n = 2\n", | |
| "params = {\n", | |
| " \"dim_red__k\" : list(reversed([int(np.floor(n**x)) for x in range(0,50) if n**x < len(df.columns)]))\n", | |
| "}\n", | |
| "params" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 34, | |
| "text": [ | |
| "{'dim_red__k': [16384,\n", | |
| " 8192,\n", | |
| " 4096,\n", | |
| " 2048,\n", | |
| " 1024,\n", | |
| " 512,\n", | |
| " 256,\n", | |
| " 128,\n", | |
| " 64,\n", | |
| " 32,\n", | |
| " 16,\n", | |
| " 8,\n", | |
| " 4,\n", | |
| " 2,\n", | |
| " 1]}" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 34 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs = grid_search.GridSearchCV(pipe_feature_scoring, param_grid = params, cv = cross_validation.KFold(n = len(df), n_folds=10), n_jobs = 2, refit=False, pre_dispatch=2, verbose = 2, scoring = \"accuracy\")\n", | |
| "gs.fit(X, y)\n", | |
| "df_scores = pd.DataFrame([(t.mean_validation_score, np.std(t.cv_validation_scores), t.parameters[\"dim_red__k\"]) for t in gs.grid_scores_], columns = [\"score\", \"std\", \"feature_count\"])\n", | |
| "df_scores.set_index(\"feature_count\", inplace=True)\n", | |
| "df_scores[\"max\"] = df_scores.score + df_scores[\"std\"]\n", | |
| "df_scores[\"min\"] = df_scores.score - df_scores[\"std\"]" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "stream": "stderr", | |
| "text": [ | |
| "[Parallel(n_jobs=2)]: Done 1 jobs | elapsed: 4.1s\n", | |
| "[Parallel(n_jobs=2)]: Done 41 jobs | elapsed: 1.2min\n", | |
| "[Parallel(n_jobs=2)]: Done 150 out of 150 | elapsed: 3.8min finished\n" | |
| ] | |
| }, | |
| { | |
| "output_type": "stream", | |
| "stream": "stdout", | |
| "text": [ | |
| "Fitting 10 folds for each of 15 candidates, totalling 150 fits\n" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 35 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "gs.grid_scores_" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 37, | |
| "text": [ | |
| "[mean: 0.88038, std: 0.05378, params: {'dim_red__k': 16384},\n", | |
| " mean: 0.87799, std: 0.05422, params: {'dim_red__k': 8192},\n", | |
| " mean: 0.88038, std: 0.05163, params: {'dim_red__k': 4096},\n", | |
| " mean: 0.88278, std: 0.05105, params: {'dim_red__k': 2048},\n", | |
| " mean: 0.88517, std: 0.05036, params: {'dim_red__k': 1024},\n", | |
| " mean: 0.89474, std: 0.04835, params: {'dim_red__k': 512},\n", | |
| " mean: 0.89474, std: 0.04835, params: {'dim_red__k': 256},\n", | |
| " mean: 0.89234, std: 0.05314, params: {'dim_red__k': 128},\n", | |
| " mean: 0.86603, std: 0.06548, params: {'dim_red__k': 64},\n", | |
| " mean: 0.86124, std: 0.05769, params: {'dim_red__k': 32},\n", | |
| " mean: 0.86364, std: 0.07171, params: {'dim_red__k': 16},\n", | |
| " mean: 0.85646, std: 0.07078, params: {'dim_red__k': 8},\n", | |
| " mean: 0.78708, std: 0.11200, params: {'dim_red__k': 4},\n", | |
| " mean: 0.62440, std: 0.11405, params: {'dim_red__k': 2},\n", | |
| " mean: 0.62440, std: 0.11405, params: {'dim_red__k': 1}]" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 37 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "df_scores[[\"max\",\"min\"]].plot()" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "metadata": {}, | |
| "output_type": "pyout", | |
| "prompt_number": 36, | |
| "text": [ | |
| "<matplotlib.axes.AxesSubplot at 0x3cc32438>" | |
| ] | |
| }, | |
| { | |
| "metadata": {}, | |
| "output_type": "display_data", | |
| "png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEQCAYAAABfiGi4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtcU2eeP/BPIFjvUrxDaIOAXARJFFGndcQqRvxZr72g\nbhVLXXqxWzvWqe1Mq93tIEztrrV0u4zbjlbHy47rqzh9VXRAM60WpYo4bXGVKijGFvFuAQXC8/uD\n4ZRIiAfkkAf9vF+vvMxz8pzkk4v5cp4n5xydEEKAiIgIgIe7AxARkTxYFIiISMGiQEREChYFIiJS\nsCgQEZGCRYGIiBS3LQpZWVkIDQ1FcHAw0tLSmtx++fJlzJgxA1FRURg5ciS+++475Taj0YihQ4fC\nbDYjJiambZMTEVGb07naT8FutyMkJATZ2dnw8/PDiBEjsHnzZoSFhSl9li5dip49e+KNN97A8ePH\n8cILLyA7OxsAEBAQgMOHD8PHx0f7Z0JERHfM5ZZCXl4egoKCYDQa4eXlhYSEBGRmZjr0OXbsGMaN\nGwcACAkJQUlJCcrLy5XbuW8cEVHH4bIo2Gw2+Pv7K22DwQCbzebQJyoqCtu3bwdQX0ROnz6Ns2fP\nAgB0Oh0mTJiA6OhorF27tq2zExFRG9O7ulGn0932DpYtW4aXXnoJZrMZkZGRMJvN8PT0BADs27cP\nvr6+KC8vR1xcHEJDQzFmzJi2SU5ERG3OZVHw8/NDaWmp0i4tLYXBYHDo06NHD3z88cdKOyAgAIMG\nDQIA+Pr6AgD69u2LGTNmIC8vr0lR8PPzw7lz5+7sWRAR3WMCAwPx/ffft/n9uhw+io6ORlFREUpK\nSlBdXY2tW7di6tSpDn2uXr2K6upqAMDatWsxduxYdO/eHZWVlbh+/ToAoKKiArt370ZkZGSTxzh3\n7hyEEPjP/xT45hsBIdx/Wb58udszdJRczMRM90IuGTOdPHmyreqAA5dbCnq9Hunp6bBYLLDb7UhK\nSkJYWBgyMjIAAMnJySgsLERiYiJ0Oh0iIiLw0UcfAQDKysowY8YMAEBtbS3mzp2LiRMnOn2cqirg\n5ZcBb29gzBggPh5oPHLl4QHExgIPPtgGz1iFkpKS9nmgFpIxFzOpw0zqyZhLxkxacVkUACA+Ph7x\n8fEOy5KTk5Xro0ePxvHjx5usFxAQgIKCAlUhjh0DBg8GcnOBP/wB+PJLx9tv3gSWLAGio4GFC4Gp\nUwEvL1V3TURELXDbotAevv0WiIgAunWr32JwpqoK+N//BdasAV54AZg/H3jmGSA4uO3zJCYmtv2d\ntgEZczGTOsyknoy5ZMykFZc7r7VLAJ0OS5cKeHsDr7+ubp3jx4H//m/gk0+AsLD6rYdZs4DOnbXN\nSkQkC51OBy2+vqUoCvHxAs8+Wz8s1BLV1UBmZn2BOHwYmDQJuP/++i2O7t3r/731urPbunSpn7do\nYLVaERsb26bPsy3ImIuZ1GEm9WTMJWMmrYqCFMNHBQX1w0ct1akT8Pjj9ZfiYmDvXuCnn+ovFRXA\nhQv1/za0b73e0L5xo74wNBQJIYB+/YD77qt/jJZcWrNOc+t6eTlOuNOdEaL+Ulf387+Nrztb1tq+\nty47frz+89Uej6X29hMngCNH5MjS+PayMqB3bzmyNCy7eRPQ6533XbkSWLTI3Z/utiPFlgIgYLc7\n/rXenurqgMpKx4JRWVm/JXK7y82b6vq1Zv2amvrCcGuhaND4nZPhuiw5br3e8J8YqC+yOl39Z83D\n4+frzpY1d93dt9+tj9VRszT8Mdfe7urhoz59BBodLon+QYj6wnBrsWi89SDbdVly3PqT5oZiwC0v\nulvc1cNHvXq5O4EjWcYPdbqftxAAeXI1xkzqMJN6MuaSMZNWpDjJjmxFgYjoXiXF8NG4cQJ79rgz\nBRFRx6LV8BG3FIiISMGi4ITVanV3BKdkzMVM6jCTejLmkjGTVqQoCm++6e4EREQESDKn4OYIREQd\nzl09p0BERHJgUXBC1vFDGXMxkzrMpJ6MuWTMpBUWBSIiUtx2TiErKwuLFy+G3W7HM888g1dffdXh\n9suXL+Ppp5/GqVOn0LlzZ3z88ccYMmSIqnUBzikQEbWGW459ZLfbERISguzsbPj5+WHEiBHYvHkz\nwsLClD5Lly5Fz5498cYbb+D48eN44YUXkJ2drWpdLZ8YEdHdzC0TzXl5eQgKCoLRaISXlxcSEhKQ\nmZnp0OfYsWMYN24cACAkJAQlJSU4f/68qnVlJev4oYy5mEkdZlJPxlwyZtKKy6Jgs9ng7++vtA0G\nA2w2m0OfqKgobN++HUB9ETl9+jTOnj2ral0iIpKLy6Ok6lQcZ3jZsmV46aWXYDabERkZCbPZDE9P\nT1XrNkhMTITRaAQAeHt7w2QyKUckbKjQ7d1u4K7Hd9aOjY2VKk+DxkeQdHcemd8/2doyfp74/jXf\ntlqtWLduHQAo35dacDmncODAAaxYsQJZWVkAgJUrV8LDw8PphHGDgIAAfPPNN/j2229Vrcs5BSKi\nlnPLnEJ0dDSKiopQUlKC6upqbN26FVNvOZHy1atXUV1dDQBYu3Ytxo4di+7du6taV1a3/rUiCxlz\nMZM6zKSejLlkzKQVl8NHer0e6enpsFgssNvtSEpKQlhYGDIyMgAAycnJKCwsRGJiInQ6HSIiIvDR\nRx+5XJeIiOTFYx8REXVAPPYRERFpjkXBCVnHD2XMxUzqMJN6MuaSMZNWWBSIiEjBOQUiog6IcwpE\nRKQ5FgUnZB0/lDEXM6nDTOrJmEvGTFphUSAiIgXnFIiIOiDOKRARkeZYFJyQdfxQxlzMpA4zqSdj\nLhkzaYVFgYiIFJxTICLqgDinQEREmmNRcELW8UMZczGTOsyknoy5ZMykFRYFIiJScE6BiKgDctuc\nQlZWFkJDQxEcHIy0tLQmt1+4cAGTJk2CyWRCRESEcmJpoP7k0kOHDoXZbEZMTEybBiciorbnsijY\n7XYsWrQIWVlZKCwsxObNm3Hs2DGHPunp6TCbzSgoKIDVasWSJUtQW1sLoL6SWa1WHDlyBHl5edo9\nizYm6/ihjLmYSR1mUk/GXDJm0orLopCXl4egoCAYjUZ4eXkhISEBmZmZDn0GDhyIa9euAQCuXbuG\n3r17Q6//+dTPHBoiIuo4XM4pbNu2Dbt27cLatWsBABs3bsTBgwfx/vvvK33q6urwyCOP4MSJE7h+\n/Tr+53/+B/Hx8QCAQYMGoVevXvD09ERycjIWLlzYNADnFIiIWkyr7069qxt1Ot1t7yAlJQUmkwlW\nqxUnT55EXFwcjh49ih49emD//v0YOHAgysvLERcXh9DQUIwZM6bJfSQmJsJoNAIAvL29YTKZEBsb\nC+DnzTa22Wab7Xu5bbValTnbhu9LTQgXcnNzhcViUdopKSkiNTXVoU98fLzYt2+f0n7kkUfE119/\n3eS+VqxYIVatWtVk+a0RjpUfEx/kfeBw+fDrD8WP1390FbVN7d27t90eqyVkzMVM6jCTejLmkjHT\nbb6+W83lnEJ0dDSKiopQUlKC6upqbN26FVOnTnXoExoaiuzsbABAWVkZjh8/jkGDBqGyshLXr18H\nAFRUVGD37t2IjIx0WaCu37yO+D/F46DtIL49/61y2XdmHyI+jMD7B99HbV1t6ysgERG5dNv9FHbu\n3InFixfDbrcjKSkJr732GjIyMgAAycnJuHDhAhYsWIAzZ86grq4Or732GubMmYNTp05h5syZAIDa\n2lrMnTsXr732WtMAjcbFnvvsOdy038TH0z5u0u9Y+TG88PkLuHzjMj6Y/AF+4f+LO37yREQdlVZz\nCtLsvJZ9KhsLMhfgm+e+gXdnb6d9hRDY+t1WvLL7FUwMnIjUCano161fOycmInK/u/6AeC/ufBH/\n9f/+q9mCANS/CAkRCSh8oRD3d74fEf8ZgQ+//hD2OnubZmmY3JGNjLmYSR1mUk/GXDJm0ooURUEI\ngZOXTmL8oPGq+ve8ryfetbyLPfP3YMt3WxDz3zE4ePagximJiO5+UgwfVVZXwuf3Pqj6TVWL1xdC\n4E/f/Am//uuvEe0bjQd7PYg+Xfugb7e+9f927atc792lN7w8vTR4FkRE7euunlM4d+0chv1hGH5Y\n8kOr7+fqjavY+f1OlFeUo7yyHOUV5bhQdUFpX6i8gEtVl9C9U3f07dpXKRzK9UbL+nTtA58uPujm\n1Q1dvbqiq1dXFhMikopbdl5rL1duXHE5l6BGr869kBCR4LJPnajD5arLuFB5QSkUDUXDdt2GgrIC\nXKi8gFP5p1DzQA0qayqVi06nUwpEV6+u6KLv4tBu7qK6n1d9P71H82+J1WpVdmqRBTOpw0zqyZhL\nxkxauWuKghoeOg/07tobvbv2RghCmu1n9Wv6AaixOxaJxpeq2iqny3+q/gnnK86jqqYKlbXNrFvz\n87oVNRXw1Hk2WziqiqpgKDegs74zuui7oLO+c9PrXs6Xu7qNW0FE1ECK4aPPT3yONXlrsHPuTndG\ncTshBGrqapotGg2XG7U3cKP2Bqpqq36+XlPlfLmKPkKIlhUVz+b7NV6upmB5eni6+2WnVhJCQEDA\nXmdHnahDnaiDXTS6/o/lzpbJ3LfxcjV9Hx/yOCYMmtDurz+Hj+4BOp0OnTw7oZNnp3Z9PWrrapUC\n4bLANLP8UtWlFhWkhtuqaqrg6eHZsqLi2bItos76zhBCSPOFo7ovJMvjZJmAgA46eOg84OnhCQ+d\nR/11XaPr/1jubFm79W3Fep46T3Ty7KSq7922r5Q0RaHXfb3cHUMh6/ihVrn0Hnp079Qd3Tt1b12m\nuNZlEkKgtq5W3RZPM8vLK8qbLLf93YZug7sp7Yb/xFp+4dzuvs7+/SyChgWp6tuS+72T7Ie+OoTR\nY0a3+n49dB6qDprZUjL+/5Mxk1akKQrcUrj36HQ6eHl6wcvTCz3v69lm92vtJ99/YCusiP1lrLtj\nOCjrVYYgnyB3xyDJSDGn8Ovdv8b9Xe7HsoeXuTMKEVGHcVcf5oJbCkREcpCjKNyUqyjIepwTGXMx\nkzrMpJ6MuWTMpBUpisLVG1elKgpERPcqKeYURq4didWTVmOUYZQ7oxARdRh3/ZyCTD9JJSK6V922\nKGRlZSE0NBTBwcFIS0trcvuFCxcwadIkmEwmREREKCeWVrNuA9kmmmUdP5QxFzOpw0zqyZhLxkxa\ncVkU7HY7Fi1ahKysLBQWFmLz5s04duyYQ5/09HSYzWYUFBTAarViyZIlqK2tVbVug0tVl6QqCkRE\n9yqXRSEvLw9BQUEwGo3w8vJCQkICMjMzHfoMHDgQ165dAwBcu3YNvXv3hl6vV7Vug5q6GnTWd26j\np3TnZNvxqYGMuZhJHWZST8ZcMmbSisuiYLPZ4O/vr7QNBgNsNptDn4ULF+K7776Dr68voqKi8N57\n76letzEtdpcnIqKWcXmYCzVf1CkpKTCZTLBarTh58iTi4uJw9OjRFoV4+PDDWLFiBQDA29sbJpNJ\nqcwNY3nt2S4oKMDixYvd9vjNtRuPa8qQBwBWr17t9vfr1raM71/DMlnyyPp54vvXfNtqtSpztkaj\nEZoRLuTm5gqLxaK0U1JSRGpqqkOf+Ph4sW/fPqX9yCOPiK+//lrVuv/4Oaw4demUqxjtbu/eve6O\n4JSMuZhJHWZST8ZcMma6zdd3q7ncT6G2thYhISHIycmBr68vYmJisHnzZoSFhSl9fvWrX6FXr15Y\nvnw5ysrKMHz4cPz9739Hz549b7suUL81Ul5Rjj5d+2hV94iI7jpuOZ+CXq9Heno6LBYL7HY7kpKS\nEBYWhoyMDABAcnIyXn/9dSxYsABRUVGoq6vD73//e/j4+ACA03Wd6dGpRxs/LSIiahVNtj9aQIII\nTci4qSiEnLmYSR1mUk/GXDJm0uq7U4o9momISA5SHPvIzRGIiDqcu/rYR0REJAcWBSca/zZZJjLm\nYiZ1mEk9GXPJmEkrLApERKTgnAIRUQfEOQUiItIci4ITso4fypiLmdRhJvVkzCVjJq2wKBARkYJz\nCkREHRDnFIiISHMsCk7IOn4oYy5mUoeZ1JMxl4yZtMKiQERECs4pEBF1QJxTICIizd22KGRlZSE0\nNBTBwcFIS0trcvuqVatgNpthNpsRGRkJvV6PK1euAKg/j+jQoUNhNpsRExPT9uk1Iuv4oYy5mEkd\nZlJPxlwyZtKKyzOv2e12LFq0CNnZ2fDz88OIESMwdepUhzOovfLKK3jllVcAAJ999hlWr14Nb29v\nAPWbN1arVTkTGxERyc3lnEJubi7eeustZGVlAQBSU1MBAMuWLXPaf86cORg/fjySkpIAAAEBATh0\n6BB69+7dfADOKRARtZhb5hRsNhv8/f2VtsFggM1mc9q3srISu3btwqxZs5RlOp0OEyZMQHR0NNau\nXdtGkYmISCsui4JOp1N9R3/5y1/w8MMPK0NHALB//34cOXIEO3fuxAcffIAvv/yy9UnbkazjhzLm\nYiZ1mEk9GXPJmEkrLucU/Pz8UFpaqrRLS0thMBic9t2yZQtmz57tsGzgwIEAgL59+2LGjBnIy8vD\nmDFjmqybmJgIo9EIAPD29obJZEJsbCyAn9+M9mwXFBS49fE7UrugoECqPLK+fw1kySNzm++f87bV\nasW6desAQPm+1ILLOYXa2lqEhIQgJycHvr6+iImJwebNmx0mmgHg6tWrGDRoEM6ePYsuXboAqB9O\nstvt6NGjByoqKjBx4kQsX74cEydOdAzAOQUiohbT6rvT5ZaCXq9Heno6LBYL7HY7kpKSEBYWhoyM\nDABAcnIyAODTTz+FxWJRCgIAlJWVYcaMGQDqi8vcuXObFAQiIpKMcDMJIjSxd+9ed0dwSsZczKQO\nM6knYy4ZM2n13ck9momISMFjHxERdUA89hEREWmORcGJW3+GJgsZczGTOsyknoy5ZMykFRYFIiJS\ncE6BiKgD4pwCERFpjkXBCVnHD2XMxUzqMJN6MuaSMZNWWBSIiEjBOQUiog6IcwpERKQ5FgUnZB0/\nlDEXM6nDTOrJmEvGTFphUSAiIgXnFIiIOiDOKRARkeZYFJyQdfxQxlzMpA4zqSdjLhkzaeW2RSEr\nKwuhoaEIDg5GWlpak9tXrVoFs9kMs9mMyMhI6PV6XLlyRdW6REQkF5dzCna7HSEhIcjOzoafnx9G\njBjh9BzNDT777DOsXr0a2dnZqtflnAIRUcu5ZU4hLy8PQUFBMBqN8PLyQkJCAjIzM5vtv2nTJsye\nPbtV6xIRkfu5LAo2mw3+/v5K22AwwGazOe1bWVmJXbt2YdasWS1eVzayjh/KmIuZ1GEm9WTMJWMm\nrbgsCjqdTvUd/eUvf8HDDz8Mb2/vFq9LRERy0Lu60c/PD6WlpUq7tLQUBoPBad8tW7YoQ0ctXTcx\nMRFGoxEA4O3tDZPJhNjYWAA/V+j2bjdw1+M7a8fGxkqVp4HVapUmj8zvn2xtGT9PfP+ab1utVqxb\ntw4AlO9LLbicaK6trUVISAhycnLg6+uLmJgYp5PFV69exaBBg3D27Fl06dKlRetyopmIqOXcMtGs\n1+uRnp4Oi8WC8PBwPPnkkwgLC0NGRgYyMjKUfp9++iksFotSEFyt2xHc+teKLGTMxUzqMJN6MuaS\nMZNWXA4fAUB8fDzi4+MdliUnJzu058+fj/nz56tal4iI5MVjHxERdUA89hEREWmORcEJWccPZczF\nTOowk3oy5pIxk1ZYFIiISME5BSKiDohzCkREpDkWBSdkHT+UMRczqcNM6smYS8ZMWmFRICIiBecU\niIg6IM4pEBGR5lgUnJB1/FDGXMykDjOpJ2MuGTNphUWBiIgUnFMgIuqAOKdARESaY1FwQtbxQxlz\nMZM6zKSejLlkzKQVFgUiIlLcdk4hKysLixcvht1uxzPPPINXX321SR+r1YqXX34ZNTU16NOnj1JV\njUYjevbsCU9PT3h5eSEvL69pAM4pEBG1mFbfnS6Lgt1uR0hICLKzs+Hn54cRI0Y0Oc/ylStX8NBD\nD2HXrl0wGAy4cOEC+vTpAwAICAjA4cOH4ePj03wAFgUiohZzy0RzXl4egoKCYDQa4eXlhYSEBGRm\nZjr02bRpE2bNmgWDwQAASkFo0BG/8GUdP5QxFzOpw0zqyZhLxkxacVkUbDYb/P39lbbBYIDNZnPo\nU1RUhEuXLmHcuHGIjo7Ghg0blNt0Oh0mTJiA6OhorF27to2jExFRW9O7ulGn0932DmpqapCfn4+c\nnBxUVlZi9OjRGDVqFIKDg7Fv3z74+vqivLwccXFxCA0NxZgxY5rcR2JiIoxGIwDA29sbJpMJsbGx\nAH6u0O3dbuCux3fWjo2NlSpPA6vVKk0emd8/2doyfp74/jXftlqtWLduHQAo35dacDmncODAAaxY\nsQJZWVkAgJUrV8LDw8NhsjktLQ1VVVVYsWIFAOCZZ57BpEmT8Nhjjznc11tvvYXu3btjyZIljgE4\np0BE1GJumVOIjo5GUVERSkpKUF1dja1bt2Lq1KkOfaZNm4Z9+/bBbrejsrISBw8eRHh4OCorK3H9\n+nUAQEVFBXbv3o3IyMg2fwJauPWvFVnImIuZ1GEm9WTMJWMmrbgcPtLr9UhPT4fFYoHdbkdSUhLC\nwsKQkZEBAEhOTkZoaCgmTZqEoUOHwsPDAwsXLkR4eDhOnTqFmTNnAgBqa2sxd+5cTJw4UftnRERE\nrcZjHxERdUA89hEREWmORcEJWccPZczFTOowk3oy5pIxk1ZYFIiISME5BSKiDohzCkREpDkWBSdk\nHT+UMRczqcNM6smYS8ZMWmFRICIiBecUiIg6IM4pEBGR5lgUnJB1/FDGXMykDjOpJ2MuGTNphUWB\niIgUnFMgIuqAOKdARESaY1FwQtbxQxlzMZM6zKSejLlkzKQVFgUiIlJwToGIqANy25xCVlYWQkND\nERwcjLS0NKd9rFYrzGYzIiIiHE7ormZdIiKSiHChtrZWBAYGiuLiYlFdXS2ioqJEYWGhQ5/Lly+L\n8PBwUVpaKoQQory8XPW6/9hKcRXBLfbu3evuCE7JmIuZ1GEm9WTMJWMmrb47XW4p5OXlISgoCEaj\nEV5eXkhISEBmZqZDn02bNmHWrFkwGAwAgD59+qhel4iI5OKyKNhsNvj7+yttg8EAm83m0KeoqAiX\nLl3CuHHjEB0djQ0bNqheV1aNh8BkImMuZlKHmdSTMZeMmbSid3WjTqe77R3U1NQgPz8fOTk5qKys\nxOjRozFq1ChV6zZITEyE0WgEAHh7e8NkMilvQsNPwdhmm2227+W21WrFunXrAED5vtSEq7Gl3Nxc\nYbFYlHZKSopITU116JOamiqWL1+utJOSksSf//xnVesKwTmFlpAxFzOpw0zqyZhLxkxafXe6HD6K\njo5GUVERSkpKUF1dja1bt2Lq1KkOfaZNm4Z9+/bBbrejsrISBw8eRHh4uKp1XfHx8YFOp3PLZdy4\ncW577OYuPj4+ran5REQtctv9FHbu3InFixfDbrcjKSkJr732GjIyMgAAycnJAIBVq1bhj3/8Izw8\nPLBw4UL8y7/8S7PrNgnQzG9tuf+CI74eRNSYVt8J0u68xi9BR3w9iKgxHhCPlEknmTCTOsyknoy5\nZMykFRYFIiJScPiog+DrQUSNcfiIiIg0x6LQgcg4rslM6jCTejLmkjGTVlgUiIhIwaLQCkajEatW\nrcLQoUPRo0cPJCUloaysDPHx8ejVqxfi4uJw5coVAMDjjz+OgQMHwtvbG2PHjkVhYSEAoLq6Gmaz\nGenp6QAAu92Ohx56CG+//Xazj9uw67tMmEkdZlJPxlwyZtIKi0Ir6HQ6bN++HTk5OTh+/Dg+++wz\nxMfHIzU1FefPn0ddXR3WrFkDAJg8eTK+//57lJeXY9iwYZg7dy4AoFOnTti4cSPefPNN/N///R9S\nU1MhhMBvfvMbdz41IrrHdeiioNPd+aW1XnzxRfTt2xe+vr4YM2YMRo8ejaioKNx3332YMWMGjhw5\nAgBYsGABunXrBi8vLyxfvhxHjx7F9evXAQBDhgzBb3/7W0ybNg3//u//jg0bNrg8kKCM45rMpA4z\nqSdjLhkzaaVDFwUh7vzSWv3791eud+nSxaHduXNn/PTTT6irq8OyZcsQFBSEXr16ISAgADqdDhcu\nXFD6zps3D2fOnMHkyZMRGBjY+kBERG2gQxcFmTj7vfCmTZuwY8cO5OTk4OrVqyguLoYQwqHv888/\njylTpiArKwv79+93+RgyjmsykzrMpJ6MuWTMpBWX51OgO3P9+nXcd9998PHxQUVFBV5//XWH2zds\n2IAjR47g6NGjyMzMxPz583H06FF069bNTYmJ6F7HLYU20nguoOFw1/PmzcODDz4IPz8/REREYPTo\n0Uq/M2fO4OWXX8Ynn3yCrl27Yvbs2YiOjsavfvWrZh9DxnFNZlKHmdSTMZeMmbTCLYVWKC4udmg3\nnIK0QVJSEpKSkgAAn376qcNtTz31lHK98dwCAGzZsqUtYxIRtRiPfdRB8PUgosZ47CMiItLcbYtC\nVlYWQkNDERwcjLS0tCa3W61W9OrVC2azGWazGf/2b/+m3GY0GjF06FCYzWbExMS0bfJ7kIzjmsyk\nDjOpJ2MuGTNpxeWcgt1ux6JFi5CdnQ0/Pz+MGDECU6dORVhYmEO/sWPHYseOHU3W1+l0sFqtPL8w\nEVEH4XJLIS8vD0FBQTAajfDy8kJCQgIyMzOb9HM1rsVx8LYj42+lmUkdZlJPxlwyZtKKy6Jgs9ng\n7++vtA0GA2w2m0MfnU6Hr776ClFRUZg8ebJywLeG2yZMmIDo6GisXbu2jaMTEVFbczl85Oo4PA2G\nDRuG0tJSdO3aFTt37sT06dNx4sQJAMD+/fsxcOBAlJeXIy4uDqGhoRgzZkyT+0hMTITRaAQAeHt7\nw2QyteKp3P0aj2s2/OXSsMxd7dWrV8NkMkmTx2q1oqCgAIsXL5YmT4PY2Fhp8jTOIkuehjbfv+bf\nr3Xr1gGA8n2pCeFCbm6usFgsSjslJUWkpqa6WkUYjUZx8eLFJstXrFghVq1a1WR5cxFuE+2eA0Ds\n3bvX3THHDcAdAAAQJElEQVSaYCZ1mEk9GXPJmEmr70iX+ynU1tYiJCQEOTk58PX1RUxMDDZv3uww\n0VxWVoZ+/fpBp9MhLy8PTzzxBEpKSlBZWQm73Y4ePXqgoqICEydOxPLlyzFx4kSHx7ib91M4c+YM\nhgwZgmvXrqna6nLlbng9iKjtaPWd4HL4SK/XIz09HRaLBXa7HUlJSQgLC0NGRgYAIDk5Gdu2bcOH\nH34IvV6Prl27Knvl/vjjj5g5cyaA+uIyd+7cJgXhbvfAAw8oh8kmIuoIuEdzB6HT6bB3717pfgVh\ntVqZSQVmUk/GXDJm4h7NElF7Os6SkhJ4eHigrq4OQP1k0ZtvvomHH34YPXv2hMViwcWLF938bIiI\nfsYthVYICAjAwIEDkZmZiZqaGpjNZvj5+eGPf/wjQkNDMXnyZIwdOxbz5s3DoEGDUFtbCw8PD8TG\nxsJms2Hnzp0wGAyIj4/HqFGjsHLlyts+psyvBxG1P7fMKchO99adTd4CgFjeuhe14XScADBmzBj0\n798fUVFRAIAZM2YgJycH8+fPd1hHp9NhwYIFCAoKAgA88cQTTvcEJyJylw5dFFr7hd4W1JyO05kB\nAwY4rNdcP2dkHNdkJnWYST0Zc8mYSSucU2gjHNohorsBi0I7u5PiIeNfKsykDjOpJ2MuGTNphUWh\njTg7Heety131IyKSAX991EFwPwX1mEkdGTMBcuaSMRP3UyAiIs1xS6GD4OtBRI1xS4GIiDTHotCB\nND62uyyYSR1mUk/GXDJm0gqLAhERKTin0EHw9SCixu65Yx/df//9/A1/I/fff7+7IxDRPeC2w0dZ\nWVkIDQ1FcHAw0tLSmtxutVrRq1cvmM1mmM1mvP3226rXdeXSpUsQQrjlsnfvXrc9dnOXS5cuSTmu\nyUzqMJN6MuaSMZNWXBYFu92ORYsWISsrC4WFhdi8eTOOHTvWpN/YsWNx5MgRHDlyBL/97W9btK6M\nCgoK3B3BKRlzMZM6zKSejLlkzKQVl0UhLy8PQUFBMBqN8PLyQkJCAjIzM5v0czaupXZdGV25csXd\nEZySMRczqcNM6smYS8ZMWnFZFGw2G/z9/ZW2wWCAzWZz6KPT6fDVV18hKioKkydPRmFhoep1iYhI\nLi4nmtVM9A4bNgylpaXo2rUrdu7cienTp+PEiRNtFtAdSkpK3B3BKRlzMZM6zKSejLlkzKQZ4UJu\nbq6wWCxKOyUlRaSmprpaRRiNRnHx4kXV6wYGBgoAvPDCCy+8tOASGBjo8ru4tVxuKURHR6OoqAgl\nJSXw9fXF1q1bsXnzZoc+ZWVl6NevH3Q6HfLy8iCEgI+Pj6p1AeD77793FYGIiNqRy6Kg1+uRnp4O\ni8UCu92OpKQkhIWFISMjAwCQnJyMbdu24cMPP4Rer0fXrl2xZcsWl+sSEZG83L5HMxERycOtxz66\nk53bWqK0tBTjxo3DkCFDEBERgTVr1gCo30EuLi4OgwcPxsSJEx1+drZy5UoEBwcjNDQUu3fvVpYf\nPnwYkZGRCA4OxksvvXTH2ex2O8xmMx599FFpMl25cgWPPfYYwsLCEB4ejoMHD7o918qVKzFkyBBE\nRkZizpw5uHnzZrtnevrpp9G/f39ERkYqy9oyw82bN/Hkk08iODgYo0aNwunTp1uVaenSpQgLC0NU\nVBRmzpyJq1evtmum5nI1ePfdd+Hh4YFLly61a67mMr3//vsICwtDREQEXn31VbdnysvLQ0xMDMxm\nM0aMGIGvv/66XTO5nGjWUm1trQgMDBTFxcWiurpaREVFicLCQk0e64cffhBHjhwRQghx/fp1MXjw\nYFFYWCiWLl0q0tLShBBCpKamildffVUIIcR3330noqKiRHV1tSguLhaBgYGirq5OCCHEiBEjxMGD\nB4UQQsTHx4udO3feUbZ3331XzJkzRzz66KNCCCFFpnnz5omPPvpICCFETU2NuHLliltzFRcXi4CA\nAHHjxg0hhBBPPPGEWLduXbtn+uKLL0R+fr6IiIhQlrVlhg8++EA899xzQgghtmzZIp588slWZdq9\ne7ew2+1CCCFeffXVds/UXC4hhDhz5oywWCzKD1Lc/Vrt2bNHTJgwQVRXVwshhDh//rzbM40dO1Zk\nZWUJIYT4/PPPRWxsbLtmcltR+Oqrrxx+nbRy5UqxcuXKdnnsadOmib/+9a8iJCRE/Pjjj0KI+sIR\nEhIihGj6SymLxSJyc3PFuXPnRGhoqLJ88+bNIjk5udU5SktLxfjx48WePXvElClThBDC7ZmuXLki\nAgICmix3Z66LFy+KwYMHi0uXLomamhoxZcoUsXv3brdkKi4udvgP3JYZLBaLOHDggBCivhj36dOn\nVZka2759u5g7d267Z2ou12OPPSaOHj3qUBTc+Vo9/vjjIicnp0k/d2ZKSEgQW7duFUIIsWnTpnZ/\n/9w2fOSundtKSkpw5MgRjBw5EmVlZejfvz8AoH///igrKwMAnDt3DgaDoUm2W5f7+fndUeaXX34Z\n77zzDjw8fn4b3J2puLgYffv2xYIFCzBs2DAsXLgQFRUVbs3l4+ODJUuW4IEHHoCvry+8vb0RFxfn\n9tcKaNv3q/H/Cb1ej169ejkMsbTGxx9/jMmTJ0uRKTMzEwaDAUOHDnVY7s5cRUVF+OKLLzBq1CjE\nxsbi0KFDbs+UmpqqfN6XLl2KlStXtmsmtxUFdxwB9aeffsKsWbPw3nvvoUePHk3ytGemzz77DP36\n9YPZbG728LftnQkAamtrkZ+fj+effx75+fno1q0bUlNT3Zrr5MmTWL16NUpKSnDu3Dn89NNP2Lhx\no1szOSNDhsZ+97vfoVOnTpgzZ467o6CyshIpKSl46623lGXNfe7bU21tLS5fvowDBw7gnXfewRNP\nPOHuSEhKSsKaNWtw5swZ/Md//Aeefvrpdn18txUFPz8/lJaWKu3S0lKHatfWampqMGvWLDz11FOY\nPn06gPq/7H788UcAwA8//IB+/fo5zXb27FkYDAb4+fnh7NmzDsv9/Pxaleerr77Cjh07EBAQgNmz\nZ2PPnj146qmn3JoJqP/rw2AwYMSIEQCAxx57DPn5+RgwYIDbch06dAi/+MUv0Lt3b+j1esycORO5\nubluzdSgLd6vhs+9n58fzpw5A6D+y+rq1avw8fFpVa5169bh888/x5/+9CdlmTsznTx5EiUlJYiK\nikJAQADOnj2L4cOHo6yszK25DAYDZs6cCQAYMWIEPDw8cOHCBbdmysvLw4wZMwDU///Ly8tT7r89\nMrmtKDTeua26uhpbt27F1KlTNXksIQSSkpIQHh6OxYsXK8unTp2K9evXAwDWr1+vFIupU6diy5Yt\nqK6uRnFxMYqKihATE4MBAwagZ8+eOHjwIIQQ2LBhg7JOS6WkpKC0tBTFxcXYsmULHnnkEWzYsMGt\nmQBgwIAB8Pf3Vw5Vkp2djSFDhuDRRx91W67Q0FAcOHAAVVVVEEIgOzsb4eHhbs3UoC3er2nTpjW5\nr23btmH8+PGtypSVlYV33nkHmZmZ6Ny5s0NWd2WKjIxEWVkZiouLUVxcDIPBgPz8fPTv39+tuaZP\nn449e/YAAE6cOIHq6mr06dPHrZmCgoLwt7/9DQCwZ88eDB48WLn/dsmkaiZEI59//rkYPHiwCAwM\nFCkpKZo9zpdffil0Op2IiooSJpNJmEwmsXPnTnHx4kUxfvx4ERwcLOLi4sTly5eVdX73u9+JwMBA\nERISovwSQAghDh06JCIiIkRgYKB48cUX2ySf1WpVfn0kQ6aCggIRHR0thg4dKmbMmCGuXLni9lxp\naWkiPDxcREREiHnz5onq6up2z5SQkCAGDhwovLy8hMFgEB9//HGbZrhx44Z4/PHHRVBQkBg5cqQo\nLi5ucaaPPvpIBAUFiQceeED5rDf8+qS9MjXO1alTJ+W1aiwgIECZaG7v16pxpurqavFP//RPIiIi\nQgwbNkzs3bvXLZkaf6a+/vprERMTI6KiosSoUaNEfn5+u2bizmtERKRw685rREQkFxYFIiJSsCgQ\nEZGCRYGIiBQsCkREpGBRICIiBYsCEREpWBRISmvWrEF4eDieeuqpFq13+vRpp6d97WjWr1+PH374\nwd0x6B7EokBS+vDDD5GdnY0NGza0aL3i4mJs2rSpxY9XV1fX4nW0tG7dOpw7d87dMegexKJA0nn2\n2Wdx6tQpTJo0CSkpKUhKSsLIkSMxbNgw7NixA0D9IdB/+ctfYvjw4Rg+fDhyc3MBAMuWLcOXX34J\ns9mM1atXY/369XjxxReV+54yZQq++OILAED37t3xyiuvwGQyITc3Fxs3bsTIkSNhNpvx7LPPuiwU\nWVlZGD58OEwmEyZMmACg/ixs06dPR1RUFEaPHo1vvvkGALBixQq8++67yroRERE4c+YMSkpKEBYW\nhn/+539GREQELBYLbty4gW3btuHQoUOYO3cuhg0bhhs3brTtC0zkym0PhEHkBg0nYXn99dfFxo0b\nhRBCXL58WQwePFhUVFSIyspK5UxsJ06cENHR0UKI+uNINZywSAgh1q1bJxYtWqS0p0yZIv72t78J\nIYTQ6XTiz3/+sxBCiMLCQvHoo4+K2tpaIYQQzz33nPjkk0+cZjt//rzw9/cXJSUlSi4hhFi0aJH4\n13/9VyFE/Rm9TCaTEEKIFStWiFWrVinrR0REiNOnT4vi4mKh1+vF0aNHhRD1Z5RreK6xsbHi8OHD\nrXvxiO6A3t1Fiag5Qgjs2rULO3bswKpVqwDUn3O2tLQUAwYMwKJFi3D06FF4enqiqKhIWUctT09P\nzJo1CwCQk5ODw4cPIzo6GgBQVVWFAQMGOF3vwIEDGDt2LB588EEAgLe3NwBg//792L59OwBg3Lhx\nuHjxIq5fv+4yQ0BAgHLSmeHDh6OkpMTh+RO1NxYFkt727dsRHBzssGzFihUYOHAgNmzYALvd7nCI\n6Mb0er3DMFDjoZjOnTs7nBRn/vz5SElJuW0enU7X7Be2s+WuMtx3333KdU9PT4fbZDphD907OKdA\nUrNYLFizZo3SPnLkCADg2rVryl/yn3zyCex2OwCgR48eDn+dG41GFBQUQAiB0tJS5YQltxo/fjy2\nbduG8vJyAPXzAw0nJ7nVyJEj8cUXXyh/1Tec3nDMmDHKSW2sViv69u2LHj16wGg0Ij8/HwCQn5+P\n4uLiZp9vQ1Hp0aMHrl275uKVIdIGiwJJqeHUlm+88QZqamowdOhQREREYPny5QCA559/HuvXr4fJ\nZMLx48fRvXt3AEBUVBQ8PT1hMpnw3nvv4aGHHkJAQADCw8Px0ksvYfjw4Q6P0SAsLAxvv/02Jk6c\niKioKEycOFE5o9qt+vbtiz/84Q+YOXMmTCYTZs+eDaB+6+Xw4cOIiorC66+/rpzcZNasWbh06RIi\nIiLwwQcfICQkxGmGxu3ExEQ8++yznGimdsfzKRARkYJbCkREpOBEM5ELo0aNws2bNx2Wbdy4EUOG\nDHFTIiJtcfiIiIgUHD4iIiIFiwIRESlYFIiISMGiQEREChYFIiJS/H/YkgTCKEMUmQAAAABJRU5E\nrkJggg==\n", | |
| "text": [ | |
| "<matplotlib.figure.Figure at 0x3cc2de10>" | |
| ] | |
| } | |
| ], | |
| "prompt_number": 36 | |
| }, | |
| { | |
| "cell_type": "code", | |
| "collapsed": false, | |
| "input": [ | |
| "df_scores.to_csv(\"../data/log_reg_score_vs_features.csv\")" | |
| ], | |
| "language": "python", | |
| "metadata": {}, | |
| "outputs": [] | |
| } | |
| ], | |
| "metadata": {} | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment