Created
November 25, 2019 07:27
-
-
Save lewtun/269f4dc908449660d8a5f05064081423 to your computer and use it in GitHub Desktop.
Mapper Pipeline Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Mapper pipeline" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%load_ext autoreload\n", | |
| "%autoreload 2" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from tempfile import mkdtemp\n", | |
| "from shutil import rmtree\n", | |
| "from joblib import Memory\n", | |
| "\n", | |
| "from giotto.mapper.lens import Eccentricity\n", | |
| "from giotto.mapper.mapper import MapperPipeline\n", | |
| "\n", | |
| "import numpy as np\n", | |
| "from sklearn.decomposition import PCA" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# create random 2D array\n", | |
| "n_rows = 20000\n", | |
| "X = np.c_[np.random.randint(1,11,(n_rows)), np.random.randint(11,21,(n_rows))]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# initialise lenses\n", | |
| "lens_1 = Eccentricity()\n", | |
| "lens_2 = Eccentricity()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# initialise memory\n", | |
| "cachedir = mkdtemp()\n", | |
| "memory = Memory(location=cachedir, verbose=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# initialise pipeline\n", | |
| "mapper_pipeline = MapperPipeline([('ecc_1', lens_1), ('ecc_2', lens_2)], memory=memory, verbose=True)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "________________________________________________________________________________\n", | |
| "[Memory] Calling giotto.mapper.mapper._fit_transform_one...\n", | |
| "_fit_transform_one(Eccentricity(exponent=inf, metric='euclidean', metric_params={}), array([[ 1, 13],\n", | |
| " ...,\n", | |
| " [ 3, 15]]), None, None, message_clsname='Pipeline', message='(step 1 of 2) Processing ecc_1')\n", | |
| "[Pipeline] ............. (step 1 of 2) Processing ecc_1, total= 7.1s\n", | |
| "________________________________________________fit_transform_one - 7.1s, 0.1min\n", | |
| "[Pipeline] ............. (step 2 of 2) Processing ecc_2, total= 6.6s\n", | |
| "CPU times: user 9.44 s, sys: 4.18 s, total: 13.6 s\n", | |
| "Wall time: 13.7 s\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "Xt = mapper_pipeline.fit_transform(X)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "MapperPipeline(memory=Memory(location=/var/folders/5s/w9kkkqrx1c503clfqpn_zk280000gn/T/tmpg88e4_zk/joblib),\n", | |
| " steps=[('ecc_1',\n", | |
| " Eccentricity(exponent=inf, metric='euclidean',\n", | |
| " metric_params={})),\n", | |
| " ('ecc_2',\n", | |
| " Eccentricity(exponent=2, metric='euclidean',\n", | |
| " metric_params={}))],\n", | |
| " verbose=True)" | |
| ] | |
| }, | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# change one transformer\n", | |
| "mapper_pipeline.set_params(ecc_2__exponent=2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "[Memory]29.0s, 0.5min : Loading _fit_transform_one from /var/folders/5s/w9kkkqrx1c503clfqpn_zk280000gn/T/tmpg88e4_zk/joblib/giotto/mapper/mapper/_fit_transform_one/699a4fdefea0e7672f924df8dc94c4e5\n", | |
| "___________________________________fit_transform_one cache loaded - 0.0s, 0.0min\n", | |
| "[Pipeline] ............. (step 2 of 2) Processing ecc_2, total= 6.2s\n", | |
| "CPU times: user 4.29 s, sys: 1.95 s, total: 6.24 s\n", | |
| "Wall time: 6.24 s\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%time\n", | |
| "Xt = mapper_pipeline.fit_transform(X)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "rmtree(cachedir)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.5" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment