Created
May 2, 2019 21:34
-
-
Save FavioVazquez/1900695037b5b9490f76b130bb4f5cb9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Collecting fklearn\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/ac/0b/8f7e380ed15ce04370d2160928c338506fa8b13f4f10fd423aec9058f536/fklearn-1.14.0-py3-none-any.whl (57kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 61kB 1.2MB/s \n", | |
"\u001b[?25hCollecting lightgbm<3,>=2.2.2 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/77/0f/5157e6b153b3d4a70dc5fbe2ab6f209604197590f387f03177b7a249ac60/lightgbm-2.2.3-py2.py3-none-manylinux1_x86_64.whl (1.2MB)\n", | |
"\u001b[K 100% |████████████████████████████████| 1.2MB 1.9MB/s \n", | |
"\u001b[?25hCollecting swifter<0.300,>=0.284 (from fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/1e/5b/aad4d173522a7b5578b7d5c9e8cd2d6c49f3bb9a23895fa16b6283495b2b/swifter-0.287.tar.gz\n", | |
"Collecting shap<1,>=0.28.5 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/30/b3/866b0101cbd1829844c35964af68c14ba522a5cce7a1e8d0f7937411d910/shap-0.28.5.tar.gz (223kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 225kB 3.6MB/s \n", | |
"\u001b[?25hCollecting joblib<1,>=0.13.2 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/cd/c1/50a758e8247561e58cb87305b1e90b171b8c767b15b12a1734001f41d356/joblib-0.13.2-py2.py3-none-any.whl (278kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 286kB 4.3MB/s \n", | |
"\u001b[?25hCollecting pyarrow<1,>=0.12.0 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/ad/25/094b122d828d24b58202712a74e661e36cd551ca62d331e388ff68bae91d/pyarrow-0.13.0-cp36-cp36m-manylinux1_x86_64.whl (48.5MB)\n", | |
"\u001b[K 100% |████████████████████████████████| 48.5MB 152kB/s \n", | |
"\u001b[?25hCollecting tqdm<5,>=4.31.1 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/6c/4b/c38b5144cf167c4f52288517436ccafefe9dc01b8d1c190e18a6b154cd4a/tqdm-4.31.1-py2.py3-none-any.whl (48kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 51kB 1.9MB/s \n", | |
"\u001b[?25hRequirement already satisfied: pandas<0.25,>=0.24.1 in /opt/conda/lib/python3.6/site-packages (from fklearn) (0.24.1)\n", | |
"Requirement already satisfied: numpy<2,>=1.15.4 in /opt/conda/lib/python3.6/site-packages (from fklearn) (1.16.2)\n", | |
"Collecting xgboost<1,>=0.81 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/6a/49/7e10686647f741bd9c8918b0decdb94135b542fe372ca1100739b8529503/xgboost-0.82-py2.py3-none-manylinux1_x86_64.whl (114.0MB)\n", | |
"\u001b[K 100% |████████████████████████████████| 114.0MB 65kB/s \n", | |
"\u001b[?25hCollecting scikit-image<1,>=0.14.2 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d4/ab/674e168bf7d0bc597218b3bec858d02c23fbac9ec1fec9cad878c6cee95f/scikit_image-0.15.0-cp36-cp36m-manylinux1_x86_64.whl (26.3MB)\n", | |
"\u001b[K 100% |████████████████████████████████| 26.3MB 257kB/s \n", | |
"\u001b[?25hRequirement already satisfied: matplotlib<4,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from fklearn) (3.0.2)\n", | |
"Requirement already satisfied: cloudpickle<1,>=0.8.0 in /opt/conda/lib/python3.6/site-packages (from fklearn) (0.8.0)\n", | |
"Collecting schema<1,>=0.6.2 (from fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/74/f5/a14c01bcc9dbf99fd164fe2c55229569456f991a162daf62d3275714d241/schema-0.7.0-py2.py3-none-any.whl\n", | |
"Requirement already satisfied: scikit-learn<1,>=0.20.2 in /opt/conda/lib/python3.6/site-packages (from fklearn) (0.20.2)\n", | |
"Requirement already satisfied: statsmodels<1,>=0.9.0 in /opt/conda/lib/python3.6/site-packages (from fklearn) (0.9.0)\n", | |
"Collecting boto3<2,>=1.7.24 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/51/99/cef419ab955dde1c35d24c4c8dca3f76c72bc81605af6ca36394871fbaa8/boto3-1.9.141-py2.py3-none-any.whl (128kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 133kB 3.7MB/s \n", | |
"\u001b[?25hCollecting catboost<1,>=0.14.2 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/2f/c4/f130237b24efd1941cb685da12496675a90045129b66774751f1bf629dfd/catboost-0.14.2-cp36-none-manylinux1_x86_64.whl (60.6MB)\n", | |
"\u001b[K 100% |████████████████████████████████| 60.6MB 112kB/s \n", | |
"\u001b[?25hRequirement already satisfied: toolz<1,>=0.9.0 in /opt/conda/lib/python3.6/site-packages (from fklearn) (0.9.0)\n", | |
"Collecting s3fs<1,>=0.2.0 (from fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/68/8a/a2430bda6106aaaee6d53fa9be914ff3023d3f9d547b959a47641addad33/s3fs-0.2.1.tar.gz (46kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 51kB 1.9MB/s \n", | |
"\u001b[?25hRequirement already satisfied: scipy in /opt/conda/lib/python3.6/site-packages (from lightgbm<3,>=2.2.2->fklearn) (1.2.1)\n", | |
"Collecting psutil (from swifter<0.300,>=0.284->fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/c6/c1/beed5e4eaa1345901b595048fab1c85aee647ea0fc02d9e8bf9aceb81078/psutil-5.6.2.tar.gz (432kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 440kB 3.2MB/s \n", | |
"\u001b[?25hRequirement already satisfied: dask[complete]>=0.19.0 in /opt/conda/lib/python3.6/site-packages (from swifter<0.300,>=0.284->fklearn) (1.1.2)\n", | |
"Requirement already satisfied: ipywidgets>=7.0.0 in /opt/conda/lib/python3.6/site-packages (from swifter<0.300,>=0.284->fklearn) (7.4.2)\n", | |
"Requirement already satisfied: numba in /opt/conda/lib/python3.6/site-packages (from swifter<0.300,>=0.284->fklearn) (0.42.0)\n", | |
"Requirement already satisfied: ipython in /opt/conda/lib/python3.6/site-packages (from shap<1,>=0.28.5->fklearn) (7.3.0)\n", | |
"Requirement already satisfied: six>=1.0.0 in /opt/conda/lib/python3.6/site-packages (from pyarrow<1,>=0.12.0->fklearn) (1.12.0)\n", | |
"Requirement already satisfied: python-dateutil>=2.5.0 in /opt/conda/lib/python3.6/site-packages (from pandas<0.25,>=0.24.1->fklearn) (2.8.0)\n", | |
"Requirement already satisfied: pytz>=2011k in /opt/conda/lib/python3.6/site-packages (from pandas<0.25,>=0.24.1->fklearn) (2018.9)\n", | |
"Requirement already satisfied: networkx>=2.0 in /opt/conda/lib/python3.6/site-packages (from scikit-image<1,>=0.14.2->fklearn) (2.2)\n", | |
"Requirement already satisfied: PyWavelets>=0.4.0 in /opt/conda/lib/python3.6/site-packages (from scikit-image<1,>=0.14.2->fklearn) (1.0.1)\n", | |
"Requirement already satisfied: pillow>=4.3.0 in /opt/conda/lib/python3.6/site-packages (from scikit-image<1,>=0.14.2->fklearn) (5.4.1)\n", | |
"Requirement already satisfied: imageio>=2.0.1 in /opt/conda/lib/python3.6/site-packages (from scikit-image<1,>=0.14.2->fklearn) (2.5.0)\n", | |
"Requirement already satisfied: cycler>=0.10 in /opt/conda/lib/python3.6/site-packages (from matplotlib<4,>=3.0.2->fklearn) (0.10.0)\n", | |
"Requirement already satisfied: kiwisolver>=1.0.1 in /opt/conda/lib/python3.6/site-packages (from matplotlib<4,>=3.0.2->fklearn) (1.0.1)\n", | |
"Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /opt/conda/lib/python3.6/site-packages (from matplotlib<4,>=3.0.2->fklearn) (2.3.1)\n", | |
"Collecting contextlib2==0.5.5 (from schema<1,>=0.6.2->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/a2/71/8273a7eeed0aff6a854237ab5453bc9aa67deb49df4832801c21f0ff3782/contextlib2-0.5.5-py2.py3-none-any.whl\n", | |
"Collecting s3transfer<0.3.0,>=0.2.0 (from boto3<2,>=1.7.24->fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/d7/de/5737f602e22073ecbded7a0c590707085e154e32b68d86545dcc31004c02/s3transfer-0.2.0-py2.py3-none-any.whl (69kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 71kB 2.2MB/s \n", | |
"\u001b[?25hCollecting botocore<1.13.0,>=1.12.141 (from boto3<2,>=1.7.24->fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/af/72/bb5092d4f8a7b6c9a4508b784cdfed6d856e2a202383c345a66da71cc612/botocore-1.12.141-py2.py3-none-any.whl (5.4MB)\n", | |
"\u001b[K 100% |████████████████████████████████| 5.4MB 1.3MB/s \n", | |
"\u001b[?25hCollecting jmespath<1.0.0,>=0.7.1 (from boto3<2,>=1.7.24->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/83/94/7179c3832a6d45b266ddb2aac329e101367fbdb11f425f13771d27f225bb/jmespath-0.9.4-py2.py3-none-any.whl\n", | |
"Collecting enum34 (from catboost<1,>=0.14.2->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/af/42/cb9355df32c69b553e72a2e28daee25d1611d2c0d9c272aa1d34204205b2/enum34-1.1.6-py3-none-any.whl\n", | |
"Collecting graphviz (from catboost<1,>=0.14.2->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/1f/e2/ef2581b5b86625657afd32030f90cf2717456c1d2b711ba074bf007c0f1a/graphviz-0.10.1-py2.py3-none-any.whl\n", | |
"Collecting distributed>=1.22; extra == \"complete\" (from dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/14/37/560b694bc2fe80c04bb64e3959e29afda4f7ed715726e357b05f01cfe07e/distributed-1.27.1-py2.py3-none-any.whl (514kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 522kB 2.5MB/s \n", | |
"\u001b[?25hCollecting partd>=0.3.8; extra == \"complete\" (from dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/9e/f5/c02903ad5a444c9f80e4d1fe4d512afd76e3801de2fba80ea9ed28f9290c/partd-0.3.10-py3-none-any.whl\n", | |
"Requirement already satisfied: widgetsnbextension~=3.4.0 in /opt/conda/lib/python3.6/site-packages (from ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (3.4.2)\n", | |
"Requirement already satisfied: traitlets>=4.3.1 in /opt/conda/lib/python3.6/site-packages (from ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (4.3.2)\n", | |
"Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.6/site-packages (from ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (5.1.0)\n", | |
"Requirement already satisfied: nbformat>=4.2.0 in /opt/conda/lib/python3.6/site-packages (from ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (4.4.0)\n", | |
"Requirement already satisfied: llvmlite>=0.27.0dev0 in /opt/conda/lib/python3.6/site-packages (from numba->swifter<0.300,>=0.284->fklearn) (0.27.0)\n", | |
"Requirement already satisfied: jedi>=0.10 in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (0.13.3)\n", | |
"Requirement already satisfied: setuptools>=18.5 in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (40.8.0)\n", | |
"Requirement already satisfied: pickleshare in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (0.7.5)\n", | |
"Requirement already satisfied: pexpect; sys_platform != \"win32\" in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (4.6.0)\n", | |
"Requirement already satisfied: prompt-toolkit<2.1.0,>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (2.0.9)\n", | |
"Requirement already satisfied: pygments in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (2.3.1)\n", | |
"Requirement already satisfied: decorator in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (4.3.2)\n", | |
"Requirement already satisfied: backcall in /opt/conda/lib/python3.6/site-packages (from ipython->shap<1,>=0.28.5->fklearn) (0.1.0)\n", | |
"Requirement already satisfied: urllib3<1.25,>=1.20; python_version >= \"3.4\" in /opt/conda/lib/python3.6/site-packages (from botocore<1.13.0,>=1.12.141->boto3<2,>=1.7.24->fklearn) (1.24.1)\n", | |
"Collecting docutils>=0.10 (from botocore<1.13.0,>=1.12.141->boto3<2,>=1.7.24->fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/36/fa/08e9e6e0e3cbd1d362c3bbee8d01d0aedb2155c4ac112b19ef3cae8eed8d/docutils-0.14-py3-none-any.whl (543kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 552kB 2.7MB/s \n", | |
"\u001b[?25hCollecting tblib (from distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/64/b5/ebb1af4d843047ccd7292b92f5e5f8643153e8b95d14508d9fe3b35f7004/tblib-1.4.0-py2.py3-none-any.whl\n", | |
"Collecting zict>=0.1.3 (from distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/bd/45/a2e6f95a850cd407d785f2f8624b02e72baf6ab910aea4bdcac7e18b4871/zict-0.1.4-py2.py3-none-any.whl\n", | |
"Requirement already satisfied: pyyaml in /opt/conda/lib/python3.6/site-packages (from distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn) (3.13)\n", | |
"Collecting sortedcontainers!=2.0.0,!=2.0.1 (from distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/13/f3/cf85f7c3a2dbd1a515d51e1f1676d971abe41bba6f4ab5443240d9a78e5b/sortedcontainers-2.1.0-py2.py3-none-any.whl\n", | |
"Collecting msgpack (from distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/92/7e/ae9e91c1bb8d846efafd1f353476e3fd7309778b582d2fb4cea4cc15b9a2/msgpack-0.6.1-cp36-cp36m-manylinux1_x86_64.whl (248kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 256kB 2.3MB/s \n", | |
"\u001b[?25hCollecting click>=6.6 (from distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/fa/37/45185cb5abbc30d7257104c434fe0b07e5a195a6847506c074527aa599ec/Click-7.0-py2.py3-none-any.whl (81kB)\n", | |
"\u001b[K 100% |████████████████████████████████| 81kB 2.4MB/s \n", | |
"\u001b[?25hRequirement already satisfied: tornado>=5 in /opt/conda/lib/python3.6/site-packages (from distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn) (5.1.1)\n", | |
"Collecting locket (from partd>=0.3.8; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/d0/22/3c0f97614e0be8386542facb3a7dcfc2584f7b83608c02333bced641281c/locket-0.2.0.tar.gz\n", | |
"Requirement already satisfied: notebook>=4.4.1 in /opt/conda/lib/python3.6/site-packages (from widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (5.7.4)\n", | |
"Requirement already satisfied: ipython_genutils in /opt/conda/lib/python3.6/site-packages (from traitlets>=4.3.1->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (0.2.0)\n", | |
"Requirement already satisfied: jupyter-client in /opt/conda/lib/python3.6/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (5.2.4)\n", | |
"Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /opt/conda/lib/python3.6/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (2.6.0)\n", | |
"Requirement already satisfied: jupyter_core in /opt/conda/lib/python3.6/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (4.4.0)\n", | |
"Requirement already satisfied: parso>=0.3.0 in /opt/conda/lib/python3.6/site-packages (from jedi>=0.10->ipython->shap<1,>=0.28.5->fklearn) (0.3.4)\n", | |
"Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.6/site-packages (from pexpect; sys_platform != \"win32\"->ipython->shap<1,>=0.28.5->fklearn) (0.6.0)\n", | |
"Requirement already satisfied: wcwidth in /opt/conda/lib/python3.6/site-packages (from prompt-toolkit<2.1.0,>=2.0.0->ipython->shap<1,>=0.28.5->fklearn) (0.1.7)\n", | |
"Collecting heapdict (from zict>=0.1.3->distributed>=1.22; extra == \"complete\"->dask[complete]>=0.19.0->swifter<0.300,>=0.284->fklearn)\n", | |
" Downloading https://files.pythonhosted.org/packages/e2/ca/f5feba2f939c97629dbce52a17acc95a0d10256ef620334795379dda8ce6/HeapDict-1.0.0.tar.gz\n", | |
"Requirement already satisfied: pyzmq>=17 in /opt/conda/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (18.0.0)\n", | |
"Requirement already satisfied: terminado>=0.8.1 in /opt/conda/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (0.8.1)\n", | |
"Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (0.6.0)\n", | |
"Requirement already satisfied: Send2Trash in /opt/conda/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (1.5.0)\n", | |
"Requirement already satisfied: jinja2 in /opt/conda/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (2.10)\n", | |
"Requirement already satisfied: nbconvert in /opt/conda/lib/python3.6/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (5.3.1)\n", | |
"Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.6/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (1.1.1)\n", | |
"Requirement already satisfied: mistune>=0.7.4 in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (0.8.4)\n", | |
"Requirement already satisfied: entrypoints>=0.2.2 in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (0.3)\n", | |
"Requirement already satisfied: bleach in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (3.1.0)\n", | |
"Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (1.4.2)\n", | |
"Requirement already satisfied: testpath in /opt/conda/lib/python3.6/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (0.4.2)\n", | |
"Requirement already satisfied: webencodings in /opt/conda/lib/python3.6/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.4.0->ipywidgets>=7.0.0->swifter<0.300,>=0.284->fklearn) (0.5.1)\n", | |
"Building wheels for collected packages: swifter, shap, s3fs, psutil, locket, heapdict\n", | |
" Building wheel for swifter (setup.py) ... \u001b[?25ldone\n", | |
"\u001b[?25h Stored in directory: /home/matrix/.cache/pip/wheels/97/17/94/ff3415e88282ef0e435671f061c170c552671b9d3af75e96de\n", | |
" Building wheel for shap (setup.py) ... \u001b[?25ldone\n", | |
"\u001b[?25h Stored in directory: /home/matrix/.cache/pip/wheels/bf/26/bd/912db1314f1cef0171d9b7f128dd01e8b8c92ed8d0062e632d\n", | |
" Building wheel for s3fs (setup.py) ... \u001b[?25ldone\n", | |
"\u001b[?25h Stored in directory: /home/matrix/.cache/pip/wheels/f5/2d/de/6e81a0885c7c5c4731bfc8f1d27abf80414d3633d6d6b103ef\n", | |
" Building wheel for psutil (setup.py) ... \u001b[?25ldone\n", | |
"\u001b[?25h Stored in directory: /home/matrix/.cache/pip/wheels/17/08/ec/22b464874958c3fc91e1a75748fae2220eb704a8b1035f9a03\n", | |
" Building wheel for locket (setup.py) ... \u001b[?25ldone\n", | |
"\u001b[?25h Stored in directory: /home/matrix/.cache/pip/wheels/26/1e/e8/4fa236ec931b1a0cdd61578e20d4934d7bf188858723b84698\n", | |
" Building wheel for heapdict (setup.py) ... \u001b[?25ldone\n", | |
"\u001b[?25h Stored in directory: /home/matrix/.cache/pip/wheels/40/b9/42/344857b482c954f48bcff6db72d388e30bf2bee4ed14706faa\n", | |
"Successfully built swifter shap s3fs psutil locket heapdict\n", | |
"Installing collected packages: lightgbm, psutil, tqdm, swifter, scikit-image, shap, joblib, pyarrow, xgboost, contextlib2, schema, docutils, jmespath, botocore, s3transfer, boto3, enum34, graphviz, catboost, s3fs, fklearn, tblib, heapdict, zict, sortedcontainers, msgpack, click, distributed, locket, partd\n", | |
"\u001b[33m The script tqdm is installed in '/home/matrix/.local/bin' which is not on PATH.\n", | |
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n", | |
"\u001b[33m The script skivi is installed in '/home/matrix/.local/bin' which is not on PATH.\n", | |
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n", | |
"\u001b[33m The script plasma_store is installed in '/home/matrix/.local/bin' which is not on PATH.\n", | |
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n", | |
"\u001b[33m The scripts dask-mpi, dask-remote, dask-scheduler, dask-ssh, dask-submit and dask-worker are installed in '/home/matrix/.local/bin' which is not on PATH.\n", | |
" Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\n", | |
"Successfully installed boto3-1.9.141 botocore-1.12.141 catboost-0.14.2 click-7.0 contextlib2-0.5.5 distributed-1.27.1 docutils-0.14 enum34-1.1.6 fklearn-1.14.0 graphviz-0.10.1 heapdict-1.0.0 jmespath-0.9.4 joblib-0.13.2 lightgbm-2.2.3 locket-0.2.0 msgpack-0.6.1 partd-0.3.10 psutil-5.6.2 pyarrow-0.13.0 s3fs-0.2.1 s3transfer-0.2.0 schema-0.7.0 scikit-image-0.15.0 shap-0.28.5 sortedcontainers-2.1.0 swifter-0.287 tblib-1.4.0 tqdm-4.31.1 xgboost-0.82 zict-0.1.4\n" | |
] | |
} | |
], | |
"source": [ | |
"!pip install --user fklearn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import toolz as fp\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from matplotlib import pyplot as plt\n", | |
"import numpy.random as random\n", | |
"import sys\n", | |
"from importlib import reload\n", | |
"import shap" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def reload_all(target):\n", | |
" list(fp.map(lambda y: reload(sys.modules[y]) if sys.modules[y] else None, list(fp.remove(lambda x: target not in x, sys.modules.keys()))))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"reload_all('fklearn')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import warnings\n", | |
"warnings.simplefilter(\"ignore\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fklearn.training.transformation import capper, floorer, prediction_ranger\n", | |
"from fklearn.training.pipeline import build_pipeline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fklearn.validation.evaluators import mean_prediction_evaluator, r2_evaluator, mse_evaluator, combined_evaluators\n", | |
"from fklearn.validation.validator import validator\n", | |
"from fklearn.validation.splitters import k_fold_splitter, out_of_time_and_space_splitter\n", | |
"from fklearn.metrics.pd_extractors import evaluator_extractor as pd_evaluator_extractor, extract as pd_extract, \\\n", | |
" reverse_learning_curve_evaluator_extractor, evaluator_extractor, extract" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Generate random data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"random.seed(131)\n", | |
"dates = pd.DataFrame( {\"score_date\": pd.date_range(\"2019-01-01\", \"2019-12-31\")} )\n", | |
"ids = pd.DataFrame( {\"id\": np.arange(0, 100)} )\n", | |
"\n", | |
"# make keys for cross join\n", | |
"dates[\"key\"] = 1\n", | |
"ids[\"key\"] = 1\n", | |
"\n", | |
"data = pd.merge(ids, dates).drop(\"key\", axis=1)\n", | |
"\n", | |
"data[\"x1\"] = 23*random.randn(data.shape[0]) + 500\n", | |
"data[\"x2\"] = 59*random.randn(data.shape[0]) + 235\n", | |
"# Noise Feature\n", | |
"data[\"x3\"] = 73*random.randn(data.shape[0]) + 793\n", | |
"data[\"y\"] = 0.37 * data[\"x1\"] +\\\n", | |
" 0.97*data[\"x2\"] + 0.32*data[\"x2\"]**2 -\\\n", | |
" 5 * data[\"id\"] * 0.2 +\\\n", | |
" np.cos(pd.to_datetime(data[\"score_date\"]).astype(int) * 200) * 20\n", | |
" \n", | |
"# inject NaNs\n", | |
"nan_idx = np.random.randint(0, data.shape[0], size=100)\n", | |
"data.loc[nan_idx, \"x1\"] = np.nan\n", | |
"\n", | |
"nan_idx = np.random.randint(0, data.shape[0], size=100)\n", | |
"data.loc[nan_idx, \"x2\"] = np.nan" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>score_date</th>\n", | |
" <th>x1</th>\n", | |
" <th>x2</th>\n", | |
" <th>x3</th>\n", | |
" <th>y</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>2019-01-01</td>\n", | |
" <td>510.355134</td>\n", | |
" <td>236.471776</td>\n", | |
" <td>906.162151</td>\n", | |
" <td>18306.384983</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0</td>\n", | |
" <td>2019-01-02</td>\n", | |
" <td>503.469826</td>\n", | |
" <td>226.489534</td>\n", | |
" <td>705.821412</td>\n", | |
" <td>16837.011069</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>0</td>\n", | |
" <td>2019-01-03</td>\n", | |
" <td>536.412361</td>\n", | |
" <td>187.096411</td>\n", | |
" <td>875.440154</td>\n", | |
" <td>11561.595513</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>0</td>\n", | |
" <td>2019-01-04</td>\n", | |
" <td>471.144774</td>\n", | |
" <td>225.913422</td>\n", | |
" <td>840.266189</td>\n", | |
" <td>16742.066653</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>2019-01-05</td>\n", | |
" <td>482.795605</td>\n", | |
" <td>333.892673</td>\n", | |
" <td>835.026821</td>\n", | |
" <td>36170.022285</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id score_date x1 x2 x3 y\n", | |
"0 0 2019-01-01 510.355134 236.471776 906.162151 18306.384983\n", | |
"1 0 2019-01-02 503.469826 226.489534 705.821412 16837.011069\n", | |
"2 0 2019-01-03 536.412361 187.096411 875.440154 11561.595513\n", | |
"3 0 2019-01-04 471.144774 225.913422 840.266189 16742.066653\n", | |
"4 0 2019-01-05 482.795605 333.892673 835.026821 36170.022285" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Split the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fklearn.preprocessing.splitting import space_time_split_dataset\n", | |
"train_start = \"2019-01-01\"\n", | |
"train_end = \"2019-06-30\"\n", | |
"holdout_end = \"2019-12-31\"\n", | |
"\n", | |
"split_fn = space_time_split_dataset(train_start_date=train_start,\n", | |
" train_end_date=train_end,\n", | |
" holdout_end_date=holdout_end,\n", | |
" split_seed=42, space_holdout_percentage=.05,\n", | |
" space_column=\"id\", time_column=\"score_date\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"((17100, 6), (900, 6), (18400, 6), (920, 6))" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_set, intime_outspace_hdout, outime_inspace_hdout, outime_outspace_hdout = split_fn(data)\n", | |
"train_set.shape, intime_outspace_hdout.shape, outime_inspace_hdout.shape, outime_outspace_hdout.shape" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Pre-processing and define learning function" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"FEATURES = [\"x1\", \"x2\", \"x3\"]\n", | |
"TARGET = [\"y\"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fklearn.training.imputation import imputer\n", | |
"my_imputer = imputer(columns_to_impute=FEATURES, impute_strategy=\"median\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fklearn.training.transformation import standard_scaler\n", | |
"my_scaler = standard_scaler(columns_to_scale=FEATURES)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from fklearn.training.regression import catboost_regressor_learner\n", | |
"my_model = catboost_regressor_learner(features=['x1', 'x2', 'x3'], target='y', prediction_column='prediction', extra_params={'random_seed': 139, 'thread_count': 8})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"reload_all('fklearn')\n", | |
"# Learns an Empirical Cumulative Distribution Function from the specified column in the input DataFrame.\n", | |
"from fklearn.training.transformation import ecdfer \n", | |
"my_ecdefer = ecdfer(prediction_column=\"prediction\", ecdf_column=\"prediction_ecdf\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Building a pipeline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"my_learner = build_pipeline(my_imputer, my_scaler, my_model, my_ecdefer)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"(prediction_function, _, logs) = my_learner(train_set)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Evaluate" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"my_evaluator = combined_evaluators(evaluators=[\n", | |
" mean_prediction_evaluator(prediction_column='prediction'),\n", | |
" r2_evaluator(prediction_column='prediction', target_column='y'),\n", | |
" mse_evaluator(prediction_column='prediction', target_column='y'),\n", | |
"])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"test_predictions = prediction_function(outime_outspace_hdout)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'mean_evaluator__prediction': 19093.91479387743,\n", | |
" 'r2_evaluator__y': 0.9958645292645391,\n", | |
" 'mse_evaluator__y': 335024.9617668662}" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"my_evaluator(test_predictions)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment