Skip to content

Instantly share code, notes, and snippets.

@lewtun
Created November 25, 2019 07:27
Show Gist options
  • Select an option

  • Save lewtun/269f4dc908449660d8a5f05064081423 to your computer and use it in GitHub Desktop.

Select an option

Save lewtun/269f4dc908449660d8a5f05064081423 to your computer and use it in GitHub Desktop.
Mapper Pipeline Example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Mapper pipeline"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from tempfile import mkdtemp\n",
"from shutil import rmtree\n",
"from joblib import Memory\n",
"\n",
"from giotto.mapper.lens import Eccentricity\n",
"from giotto.mapper.mapper import MapperPipeline\n",
"\n",
"import numpy as np\n",
"from sklearn.decomposition import PCA"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# create random 2D array\n",
"n_rows = 20000\n",
"X = np.c_[np.random.randint(1,11,(n_rows)), np.random.randint(11,21,(n_rows))]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# initialise lenses\n",
"lens_1 = Eccentricity()\n",
"lens_2 = Eccentricity()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# initialise memory\n",
"cachedir = mkdtemp()\n",
"memory = Memory(location=cachedir, verbose=10)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"# initialise pipeline\n",
"mapper_pipeline = MapperPipeline([('ecc_1', lens_1), ('ecc_2', lens_2)], memory=memory, verbose=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"________________________________________________________________________________\n",
"[Memory] Calling giotto.mapper.mapper._fit_transform_one...\n",
"_fit_transform_one(Eccentricity(exponent=inf, metric='euclidean', metric_params={}), array([[ 1, 13],\n",
" ...,\n",
" [ 3, 15]]), None, None, message_clsname='Pipeline', message='(step 1 of 2) Processing ecc_1')\n",
"[Pipeline] ............. (step 1 of 2) Processing ecc_1, total= 7.1s\n",
"________________________________________________fit_transform_one - 7.1s, 0.1min\n",
"[Pipeline] ............. (step 2 of 2) Processing ecc_2, total= 6.6s\n",
"CPU times: user 9.44 s, sys: 4.18 s, total: 13.6 s\n",
"Wall time: 13.7 s\n"
]
}
],
"source": [
"%%time\n",
"Xt = mapper_pipeline.fit_transform(X)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MapperPipeline(memory=Memory(location=/var/folders/5s/w9kkkqrx1c503clfqpn_zk280000gn/T/tmpg88e4_zk/joblib),\n",
" steps=[('ecc_1',\n",
" Eccentricity(exponent=inf, metric='euclidean',\n",
" metric_params={})),\n",
" ('ecc_2',\n",
" Eccentricity(exponent=2, metric='euclidean',\n",
" metric_params={}))],\n",
" verbose=True)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# change one transformer\n",
"mapper_pipeline.set_params(ecc_2__exponent=2)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[Memory]29.0s, 0.5min : Loading _fit_transform_one from /var/folders/5s/w9kkkqrx1c503clfqpn_zk280000gn/T/tmpg88e4_zk/joblib/giotto/mapper/mapper/_fit_transform_one/699a4fdefea0e7672f924df8dc94c4e5\n",
"___________________________________fit_transform_one cache loaded - 0.0s, 0.0min\n",
"[Pipeline] ............. (step 2 of 2) Processing ecc_2, total= 6.2s\n",
"CPU times: user 4.29 s, sys: 1.95 s, total: 6.24 s\n",
"Wall time: 6.24 s\n"
]
}
],
"source": [
"%%time\n",
"Xt = mapper_pipeline.fit_transform(X)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"rmtree(cachedir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment