amirziai · May 6, 2018 23:05
diff --git a/sklearn-pipeline-pickle.ipynb b/sklearn-pipeline-pickle.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.datasets import load_iris\n",
    "from sklearn.externals import joblib\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.feature_selection import SelectKBest, f_regression\n",
    "\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.ensemble import RandomForestClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = load_iris()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['DESCR', 'feature_names', 'data', 'target', 'target_names'])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(112, 4)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Use classes as dictionary keys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "models = [LogisticRegression, RandomForestClassifier]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = {\n",
    "    model: model().fit(X_train, y_train)\n",
    "    for model in models\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.868421052631579"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[LogisticRegression].score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = 'results.pkl'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Dump the dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['results.pkl']"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joblib.dump(results, path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = joblib.load(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9736842105263158"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[RandomForestClassifier].score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = {\n",
    "    model: Pipeline(steps=[\n",
    "        ('kbest', SelectKBest(f_regression, 2)),\n",
    "        ('model', model())\n",
    "    ]).set_params().fit(X_train, y_train)\n",
    "    for model in models\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7368421052631579"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[LogisticRegression].score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['results.pkl']"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "joblib.dump(results, path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = joblib.load(path)\n",
    "results[RandomForestClassifier].score(X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### GridSearch..."
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.model_selection import train_test_split\n",
	"from sklearn.datasets import load_iris\n",
	"from sklearn.externals import joblib\n",
	"from sklearn.pipeline import Pipeline\n",
	"from sklearn.feature_selection import SelectKBest, f_regression\n",
	"\n",
	"from sklearn.linear_model import LogisticRegression\n",
	"from sklearn.ensemble import RandomForestClassifier"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"data = load_iris()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"dict_keys(['DESCR', 'feature_names', 'data', 'target', 'target_names'])"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"data.keys()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(112, 4)"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"X_train.shape"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Use classes as dictionary keys"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [],
	"source": [
	"models = [LogisticRegression, RandomForestClassifier]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"results = {\n",
	" model: model().fit(X_train, y_train)\n",
	" for model in models\n",
	"}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.868421052631579"
	]
	},
	"execution_count": 15,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"results[LogisticRegression].score(X_test, y_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [],
	"source": [
	"path = 'results.pkl'"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Dump the dictionary"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"['results.pkl']"
	]
	},
	"execution_count": 17,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"joblib.dump(results, path)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [],
	"source": [
	"results = joblib.load(path)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.9736842105263158"
	]
	},
	"execution_count": 21,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"results[RandomForestClassifier].score(X_test, y_test)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### Pipeline"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"metadata": {},
	"outputs": [],
	"source": [
	"results = {\n",
	" model: Pipeline(steps=[\n",
	" ('kbest', SelectKBest(f_regression, 2)),\n",
	" ('model', model())\n",
	" ]).set_params().fit(X_train, y_train)\n",
	" for model in models\n",
	"}"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"0.7368421052631579"
	]
	},
	"execution_count": 68,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"results[LogisticRegression].score(X_test, y_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"['results.pkl']"
	]
	},
	"execution_count": 69,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"joblib.dump(results, path)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 70,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"1.0"
	]
	},
	"execution_count": 70,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"results = joblib.load(path)\n",
	"results[RandomForestClassifier].score(X_test, y_test)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"### GridSearch..."
	]
	}
	],
	"metadata": {
	"anaconda-cloud": {},
	"kernelspec": {
	"display_name": "Python [default]",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
No results found