Last active
December 28, 2016 14:27
-
-
Save benjaminwilson/b25a321f292f98d74269b83d4ed2b9a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# LCD digits dataset shows NMF parts-based decomposition\n", | |
"\n", | |
"This synthetic image dataset shows very nicely how NMF decompose images as the sums of their parts. The collection of images is encoded as a 2d array of non-negative values. Each row corresponds to an image, and each column corresponds to a pixel. The non-negative entries represent the whiteness of the pixel, encoded here as a value between 0 and 1.\n", | |
" \n", | |
"See also the accompanying blog post http://building-babylon.net/2016/12/28/an-lcd-digit-dataset-for-illustrating-the-parts-based-representation-of-nmf/ .\n", | |
"\n", | |
"Released under Apache Licence v2.0. I hope you find it useful." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"length = 4 # length of any LCD cell (\"stroke\")\n", | |
"shape = (2 * length + 5, length + 4) # shape of the images" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Code for displaying a vector as an image" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from matplotlib import pyplot as plt\n", | |
"\n", | |
"def show_as_image(vector):\n", | |
" \"\"\"\n", | |
" Given a 1d vector representing an image, display that image in \n", | |
" black and white. If there are negative values, then use red for \n", | |
" that pixel.\n", | |
" (displaying negative pixel values in red allows e.g. visual contrasting\n", | |
" of PCA and NMF components)\n", | |
" \"\"\"\n", | |
" bitmap = vector.copy().reshape(shape) # make a square array\n", | |
" bitmap /= np.abs(vector).max() # normalise (a copy!)\n", | |
" bitmap = bitmap[:,:,np.newaxis]\n", | |
" rgb_layers = [np.abs(bitmap)] + [bitmap.clip(0)] * 2\n", | |
" rgb_bitmap = np.concatenate(rgb_layers, axis=-1)\n", | |
" plt.figure(figsize=(2, 1))\n", | |
" plt.imshow(rgb_bitmap, interpolation='nearest')\n", | |
" plt.xticks([])\n", | |
" plt.yticks([])\n", | |
" plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Code for generating the images" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 1. 1. 1. 1. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]]\n" | |
] | |
} | |
], | |
"source": [ | |
"import numpy as np\n", | |
"\n", | |
"def vertical_stroke(rightness, downness):\n", | |
" \"\"\"\n", | |
" Return a 2d numpy array representing an image with a single vertical stroke in it.\n", | |
" `rightness` and `downness` are values from [0, 1] and define the position of the vertical stroke.\n", | |
" \"\"\"\n", | |
" i = (downness * (length + 1)) + 2\n", | |
" j = rightness * (length + 1) + 1\n", | |
" x = np.zeros(shape=shape, dtype=np.float64)\n", | |
" for delta in range(length):\n", | |
" x[i + delta, j] = 1.\n", | |
" return x\n", | |
"\n", | |
"def horizontal_stroke(downness):\n", | |
" \"\"\"\n", | |
" Analogue to vertical_stroke, but it returns horizontal strokes.\n", | |
" `downness` is here a value in [0, 1, 2].\n", | |
" \"\"\"\n", | |
" i = (downness * (length + 1)) + 1\n", | |
" x = np.zeros(shape=shape, dtype=np.float64)\n", | |
" for j in range(length):\n", | |
" x[i, 2 + j] = 1.\n", | |
" return x\n", | |
"\n", | |
"print(horizontal_stroke(0))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"strokes = [horizontal_stroke(k) for k in range(3)] + [vertical_stroke(k, l) for k in range(2) for l in range(2)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import random\n", | |
"\n", | |
"def random_composition():\n", | |
" \"\"\"\n", | |
" Return a random composition of 2, 3, 4, or 5 strokes as a single 2d numpy array.\n", | |
" (So not guaranteed to look like a real digit!)\n", | |
" \"\"\"\n", | |
" x = np.zeros(shape=shape, dtype=np.float64)\n", | |
" num_strokes = random.choice([2, 3, 4, 5])\n", | |
" sample = random.sample(strokes, num_strokes) # without replacement\n", | |
" for atom in sample:\n", | |
" x += atom\n", | |
" return x" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 156, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 1. 1. 1. 1. 0. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 1. 1. 1. 1. 0. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
" [ 0. 1. 0. 0. 0. 0. 1. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]\n", | |
" [ 0. 0. 0. 0. 0. 0. 0. 0.]]\n" | |
] | |
} | |
], | |
"source": [ | |
"bitmap = random_composition()\n", | |
"print(bitmap)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 157, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAYhJREFUeJzt3LFNw1AUQNH3EEWUlgVcUiJaBmAI5ssQGQBmoMwC9HSf\nIhS3sfXQtx1b3NOksOU8X1nfimwlW2uhq7tbD7AlxgBjgDHAGGAMMAaUYmTmMTOfM/O49EBLqM5/\nXzzeU0S8R8RbZn52T7e+x4g4RcRLRHyM7VSNMfx+nvpmurkhJmJU14zLHJNswGVqYzXGd/8cmzB5\nHt5NoLpm/Nkav4Yzc9bjeWWAMcAYYAwwBnTfTcbuGnOv9GvwygBjgDHAGGAMMAYYA4wBxgBjgDHA\nGGAMMAYYA4wBxgBjgDHAGGAMMAYYA7qfm4w9H/Ep/M4ZA4wBxgBjgDFg168kzP3dXhlgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY8BirySswVcSFmQMMAYYA4wBxgBjgDHA\nGGAMqMY4LDrFeibPoxpj6J9jE4apjVl5xzszHyLiNa5/FLjH//k7xDXEubX2NbZTKcZ/4QIKxgBj\ngDHAGGAMMAb8AJimKhAh4kfnAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10d3e0128>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"show_as_image(bitmap.flatten())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Generate the dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(200, 104)\n" | |
] | |
} | |
], | |
"source": [ | |
"N = 200\n", | |
"samples = [random_composition().flatten() for _ in range(N)]\n", | |
"X = np.vstack(samples)\n", | |
"print(X.shape)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# NMF learns the individual LCD cells" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 158, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from sklearn.decomposition import NMF\n", | |
"\n", | |
"model = NMF(n_components=7)\n", | |
"model.fit(X)\n", | |
"X_transformed = model.transform(X)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 159, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXRJREFUeJzt3LFNw1AURuH/IoooLQu4pES0DMAQzJchGABmoMwC9HSP\nwhSnsfUUx4kjzteksGVdH1lOYftVay0a3V17gC0xBhgDjAHGAGOAMaArRlXtq+q5qvZrD7SG3vnv\nO4/3lOQjyVtVfS2e7vIekxySvCT5nNqpN8bw93tYNtPVDZmJ0XvPOJ5jkg04zm3sjfGzfI5NmD0P\n/03AGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wB\nxgBjgDHAGGAMMAYYA4wBvS/FbtLUJ2VVddLxvDLAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBj\ngDHAGGAMMAYYA4wBxoCbfm5y6vORKV4ZYAwwBhgDjAHGAGOAMcAYYAzojbFbdYrLmT2P3hjD8jk2\nYZjbWD3rjlfVQ5LXjAsF3uI6f7uMId5ba99TO3XF+C+8gYIxwBhgDDAGGAOMAb/0Ox4ERg6wHgAA\nAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10d3e0160>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXRJREFUeJzt3LFNw1AURuH/IoooLQu4pES0DMAQzJchGABmoMwC9HSP\nwhSnsfUUx4kjzteksGVdH1mOZUuvWmvR6O7aA2yJMcAYYAwwBhgDjAFdMapqX1XPVbVfe6A19M5/\n33m8pyQfSd6q6mvxdJf3mOSQ5CXJ59ROvTGGv9/DspmubshMjN57xvEck2zAcW5jb4yf5XNswux5\n+G8CxgBjgDHAGGAMMAb0PoFu0tT726o66XheGWAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHA\nGGAMMAYYA4wBxgBjwE1/Nzn1+8gUrwwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgD\njAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAzojbFbdYrLmT2P3hjD8jk2\nYZjbWD3rjlfVQ5LXjAsF3uI6f7uMId5ba99TO3XF+C+8gYIxwBhgDDAGGAOMAb+XDR4EFHrWqQAA\nAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x1048477f0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXNJREFUeJzt3L1NxEAUReH7EMFqUxpwSIhIKYAiqG+LoACogXAbICcb\nAhOcZK1B/lkjzpc4sGU9H1meZORqrUWjm2sPsCfGAGOAMcAYYAwwBnTFqKpjVT1W1XHtgdbQO/9t\n5/0ekrwleamqj9nTbe8+ySnJU5L3Sxf1xhh+jqd5M13dkIkYvd+M8xKT7MB56mRvjK/5c+zC5HO4\nmoAxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAb07d35tiz3pVbXo/XwzwBhgDDAGGAOMAastrUsve1vwzQBjgDHAGGAMMAYYA4wBxgBjgDHAGGAM\nMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjQG+Mw6pTbGfyOXpjDPPn2IVh\n6mT17PGuqrskzxl/FPgX//N3yBjitbX2eemirhj/hR9QMAYYA4wBxgBjgDHgG7+5HgRvcxjuAAAA\nAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10d46a048>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXNJREFUeJzt3L1NxEAUReH7EMFqUxpwSIhIKYAiqG+LoACogXAbICcb\nAhOcZK1B/lkjzpc4sGU9H1ljOZlqrUWjm2sPsCfGAGOAMcAYYAwwBnTFqKpjVT1W1XHtgdbQO/9t\n5/0ekrwleamqj9nTbe8+ySnJU5L3Sxf1xhh+jqd5M13dkIkYvWvGeYlJduA8dbI3xtf8OXZh8jn8\nmoAxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAG\nGAN6NxP5tS22yauqRe/nmwHGAGOAMcAYsNrXZOmVfgu+GWAMMAYYA3pjHFadYjuTz9EbY5g/xy4M\nUyer54eqqu6SPGfcKPAv7vN3yBjitbX2eemirhj/hQsoGAOMAcYAY4AxwBjwDSI3HgSezP/jAAAA\nAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10d31efd0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXVJREFUeJzt3DFOw0AQQNEZRBGl5QIuKREtB+AQnC+H4ABwBspcgJ5u\nKUzxm7VG2thxxH9NClvW+MtyCtubrbXQ7O7aA+yJMcAYYAwwBhgDjAGlGJl5zMznzDyuPdAaqvPf\nF4/3FBEfEfGWmV/D023vMSJOEfESEZ+9naoxpr/f09hMVzfFQozqPeN8iUl24Ly0sRrjZ3yOXVg8\nD/9NwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAdWXYrt6n3Vl5uihN+eVAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAG\nGAOMAcYAY4AxwBhgDDAGDD83ucXnIz1eGWAMMAYYA4wBxgBjgDHAGGAMqMY4rDrFdhbPoxpjGp9j\nF6aljVlZdzwzHyLiNeaFAm9xnb9DzCHeW2vfvZ1KMf4Lb6BgDDAGGAOMAcYAY8Avac0eBNFJxq8A\nAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10ff8dfd0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXJJREFUeJzt3L1NxEAUReH7EMFqUxpwSIhIKYAiqG+LoACogXAbICcb\nAhOcZK1B/lkjzpc4sGU9H1ljOZlqrUWjm2sPsCfGAGOAMcAYYAwwBnTFqKpjVT1W1XHtgdbQO/9t\n5/0ekrwleamqj9nTbe8+ySnJU5L3Sxf1xhh+jqd5M13dkIkYvWvGeYlJduA8dbI3xtf8OXZh8jn8\nmkDvmvFrW/wNV9Wi9/PNAGOAMcAYYAxY7Wuy9Eq/Bd8MMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBj\ngDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wB\nxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAM6I1xWHWK7Uw+R2+MYf4cuzBM\nnayendaq6i7Jc8aNAv/iPn+HjCFeW2ufly7qivFfuICCMcAYYAwwBhgDjAHfOUQeBFs6qIkAAAAA\nSUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x111621f60>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAXVJREFUeJzt3DFOw0AQQNEZRBGl5QIuKREtB+AQnC+H4ABwBspcgJ5u\nKUzxm7VG2thxxH9NClvW+MtyLFvabK2FZnfXHmBPjAHGAGOAMcAYYAwoxcjMY2Y+Z+Zx7YHWUJ3/\nvni8p4j4iIi3zPwanm57jxFxioiXiPjs7VSNMf39nsZmuropFmJU7xnnS0yyA+eljdUYP+Nz7MLi\nefhvAsYAY4AxwBhgDDAGVJ9Au3rvUDNz9NCb88oAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOM\nAcYAY4AxwBhgDDAGGAOGv5vc4veRHq8MMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYY\nA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMqMY4rDrFdhbPoxpjGp9j\nF6aljVlZdzwzHyLiNeaFAm9xnb9DzCHeW2vfvZ1KMf4Lb6BgDDAGGAOMAcYAY8AvDJ8eBEUOVQQA\nAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10fae0048>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"for component in model.components_:\n", | |
" show_as_image(component)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 164, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAYZJREFUeJzt3LFNw0AUgOH3EEWUlgVcUiJaBmAI5ssQDAAzUGYBerqj\nMMVf4NMF+xxb/F+Twpbz/Ms6K3LkLKWERjfXHmBLjAHGAGOAMcAYYAxoipGZx8x8zMxj74F6aJ3/\ntvF4DxHxFhEvmfkxe7r13UfEKSKeIuJ9aqfWGMPP52neTFc3RCVG65pxXmKSDTjXNrbG+Jo/xyZU\nz8O7CbSuGRdb49dwZi56PK8MMAYYA4wBxoBud5OlV/rfTN2x/vrdXhlgDDAGGAOMAcYAY4AxwBhg\nDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY0C35yZr8Cl8R8YAY4AxwBhgDNj1rdW/JHRkDDAGGAOM\nAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY8Cun5v4l4SOjAHGAGOAMcAYYAwwBhgD\njAGtMQ5dp1hP9TxaYwzz59iEobYxW960lpl3EfEc44sC9/iev0OMIV5LKZ9TOzXF+C9cQMEYYAww\nBhgDjAHGgG9+hiQQGsUb9QAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10d2c6390>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"digit7 = horizontal_stroke(0) + vertical_stroke(1, 0) + vertical_stroke(1, 1)\n", | |
"show_as_image(digit7)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([[ 0.4930625 , 0.54888887, 0. , 0. , 0. ,\n", | |
" 0.39639133, 0. ]])" | |
] | |
}, | |
"execution_count": 165, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"model.transform([digit7.flatten()])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"# Demonstration that PCA does not!\n", | |
"\n", | |
"The components that we obtain from PCA are dense and can not be interpreted." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 160, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"PCA(copy=True, iterated_power='auto', n_components=7, random_state=None,\n", | |
" svd_solver='auto', tol=0.0, whiten=False)" | |
] | |
}, | |
"execution_count": 160, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from sklearn.decomposition import PCA\n", | |
"\n", | |
"pca = PCA(n_components=7)\n", | |
"pca.fit(X)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 161, | |
"metadata": { | |
"collapsed": false, | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAd9JREFUeJzt3D1uGlEUQOF7IxeINhugjKCJHDrXURaRFbAiVkDBEqLU\ncUXSgVyyAffpXgos6xQwfjPDAFbO11iagafL0dMgzE+WUkIHH649wC0xBhgDjAHGAGOAMaAqRmaO\nM/M+M8dDDzSE2vnvKtf7HBG/IuJ7Zj71nu7yPkXEKiIeIuLx1I1qY0xe/q76zXR1k2iIUXvN2J9j\nkhuwbzpZG+Nv/zluQuPj8NkEaq8ZrW2326GWfjWbzc66njsDjAHGAGOAMaD3s8l6vT56/NxX+mNO\n/fc2O67nzgBjgDHAGGAMGOy1ySWc+71AdwYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAZk\nzWfHM/M+In63WXg6nXadqdput2t7ly+llD+nTrozwBhgDDAGGAOMAb3fRJrP50ePbzabvku/abFY\nHD2+XC47refOAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDBvtIwteu\nE7Xws/1d/EhCLWOAMcAYYAwY7KucHa70V+fOAGOAMcAYUBtjNOgUl9P4OGpjTPrPcRMmTSdrX6h9\njIhvcfihwPf4O3+jOIT4UUp5PnWjqhj/Cy+gYAwwBhgDjAHGAGPAPw7SPTXqg1X7AAAAAElFTkSu\nQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10fb0ef60>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAeJJREFUeJzt3DFOKkEcgPH/37wEY2e8ABWxNFi+A3gBOlvOYcE5aD3E\nO4B2WhsqL/BiK9VY8PLyFewyOIAYv19CSHZhMvtlshRMNkspoZWTr57AMTEGGAOMAcYAY4AxoCpG\nZp5l5jgzz/Y9oX2onf+vyvGuIuIhIm4z86V5dod3GRH3EfE7Ih67PlQbY/jv/b5tTl9uGD0xau8Z\nr7uYyRF47TtZG+O9fR5Hofc6/DWB2nvG1qbT6b6G/m8+n+90PFcGGAOMAcYAY0Dzr8lkMll7fNd3\n+nUGg8Ha48vl8lPjuTLAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxoCs\n2TuemeOIeNpm4EPsSM/tv3JdSnnuOunKAGOAMcAYYAwwBjRvSVgsFmuP52jUOvRGbx3Hzz85nisD\njAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYYAxo/t9k1PH/yGw2ax16o/O7\nu52O58oAY4AxwBhgDCqlbHxFxDhWe9a++2vcd52uDDAGGAOMAbUxTvc6i8PpvY7aGMP2eRyFYd/J\n2r3jFxFxE6sHBX7H5/ydxirEn1LK364PVcX4KbyBgjHAGGAMMAYYA4wBHz1mlLK8+h9yAAAAAElF\nTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x111567e80>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAdNJREFUeJzt3DFOG0EUgOH3UAqLlgu4TIlMSY04gSu4g48QcQTfIVQ+\nAaKGDmpKLkBPNxRG0V8wm0lm1xjxf42lXXv0/Gu0LmxvllJCWwefPcA+MQYYA4wBxgBjgDGgKUZm\nHmbmIjMPpx5oCq3z/2hc7zgi7iLiIjOfuqfbvZ8RcR0RpxFxX3tSa4z5++N130yfbh4DMVqvGc9j\nTLIHnodOtsZ47Z9jLwy+Dz9NoPWa8c82m81US/+xXC5HXc+dAcYAY4AxwBjQ/Wnyu3J87Cv9R64q\nx3/953ruDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxoPt7k8vK8fV6\n3bv0X61Wq1HXc2eAMcAYYAwwBhgDsuVvWZm5iIiHj87dVV5z2jFUq9vK8bP6S05KKY+1k+4MMAYY\nA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDGg+ycJte9HdnGbvMwcdT13BhgD\njAHGAGPAZLeZGftKvwvuDDAGGAOMAa0xZpNOsTuD76M1xrx/jr0wHzrZ+gO3o4g4j+2NAr/iff5m\nsQ1xU0p5qT2pKcZ34QUUjAHGAGOAMcAYYAx4A65JMUDNCXevAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10d30f4e0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAclJREFUeJzt3DFKA1EUQNH3RIjYiRtIaSm2LsAmWYHrcwVJ4wJ0DZZu\nQGwNBMciIrfIjD/5M8mI9zSBSfi8XD4zkCGTTdOENk6OPcCYGAOMAcYAY4AxwBhQFCMzzzPzJjPP\nhx5oCKXznxaudx0RTxFxn5kv1dMd3lVEPETEbUQ8t32oNMb0+/Whbqajm0ZHjNJzxmsfk4zAa9eb\npTE+6ucYhc7v4dUESs8ZO7sYamF473k9dwYYA4wBxgBjQPXVZL1eb1/4dLAL1Y/ZbLb1+HK53Gs9\ndwYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYYA4wBxgBjgDHAGFB9c6Pt/sh+dy52M9vz\n/kgbdwYYA4wBxgBjgDGg+tL62XL8EJUXi8XW4/P5fK/13BlgDDAGGAOMAdVXk+xjipFwZ4AxwBhg\nDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY8Bgv3StVqvapX81mUx6Xc+dAcYAY4AxwBgw2L/3+z7T\nH4I7A4wBxgBjQGmMs0GnOJzO71EaY1o/xyhMu97MkueOZ+ZlRNzF5kGBf/E5f2exCfHYNM1b24eK\nYvwXnkDBGGAMMAYYA4wBxoAvz68rluodCCEAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10453b8d0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAcVJREFUeJzt3DFOAkEUgOH3iAXYegFK7YwdsbSwtjPxfCR21h5AOYKW\nXMAaK9YCi9+E3YzMLmD8v8ZkIOPjz2S32GSzaZrQxujQAxwTY4AxwBhgDDAGGAOKYmTmaWZeZebp\n0AMNoXT+k8L9LiPiJSIeMvO9err9O4+IeURcR8Rr25dKY0y//87rZjq4aXTEKL1mLPuY5Agsuz4s\njfFZP8dR6Pwd3k2g9Jrxa+v1eut6Zvb2P/rcK8KT8YMxwBhgDDAGVN9NJpPJ1vXRaPjOi8Vi6/ps\nNttpP08GGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhQ/dxktVptXb+p\n3bjArs9H2ngywBhgDDAGGAOMAdW31seW9fvajQvctaw/7bifJwOMAcYAY4AxwBhgDDAGGAOMAcYA\nY4AxwBhgDDAGGAOMAcYAY4AxwBhgDKh+btL2fOStduMCFz3v58kAY4AxwBhgDBjsNTN9X+n3wZMB\nxgBjgDGgNMZ40Cn2p/N3lMaY1s9xFKZdH2bJe8cz8ywibmPzosC/+J6/cWxCPDdN89H2paIY/4UX\nUDAGGAOMAcYAY4Ax4AvNyCh+jsKVcwAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10fea8080>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAc9JREFUeJzt3LFKw1AUgOFzioPUSXyBjo5S6eQD+BBufbBuPoQPoEvR\n2bEv4Cg4eR0q8g9NuG0SG/H/FiGpl9OfS4ISkqWU0Nbk2AOMiTHAGGAMMAYYA4wBVTEyc5qZ88yc\nDj3QEGrnP6lc7yoiHiPiLjNfO0/3+y4j4j4ibiLiqelDtTFm3z/vu810dLNoiVF7zdj0MckIbNpO\n1sb46D7HKLR+D+8mUHvN2NtisRhq6R/r9brX9dwZYAwwBhgDjAGd7yafDccnPV/pd1k2HF8duJ47\nA4wBxgBjgDGg890k+5hiJNwZYAwwBhgDjAHGAGOAMcAYYAwwBhgDjAHGAGOAMcAYMNh/ut66Llzh\nouf13BlgDDAGGAOMAcaAzrfW84bjfd/2dlmtdj98sFw2PazQzp0BxgBjgDHAGGAMMAYYA4wBxgBj\ngDHAGGAMMAYYA4wBxgBjgDHAGGAMMAZkzevsMnMeEc/7LHx26ER7eN//V65LKS9NJ90ZYAwwBhgD\njAGDvWbmgCv90bkzwBhgDDAG1MY4HXSK39P6PWpjzLrPMQqztpO1f6hdRMRtbF8U+Bff83ca2xAP\npZTGx9qrYvwXXkDBGGAMMAYYA4wBxoAvlGEvtjIt9mYAAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10fb0abe0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAEMAAABhCAYAAACNrhxOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAAcJJREFUeJzt3LFKw1AUgOFzxKF0ddWS0VFcXQX7Dr6Xb9B3qOCqz+BY\n1NXdLQ51+AcTr71JG/H/FiEth9vfS4INJtu2DW0dHXoBU2IMMAYYA4wBxgBjQFGMzJxn5mVmzsde\n0BhK139cOO8iIh4j4jYzn6tXt3/nEbGKiKuIeOp6U2mM5uvnqm5NB9dET4zSc8ZmiJVMwKbvxdIY\nH/XrmITez+HVBErPGb/2MtZgWAw8z50BxgBjgDHAGFB9NVkul98eX6zXtaN/1PXtbe44z50BxgBj\ngDHAGDDa3yb7MPS9QHcGGAOMAcYAY4AxwBhgDDAGGAOMAcYAY4AxwBhgDDAGVH/Tte64P3JXO7jA\n0L9JdwYYA4wBxgBjgDGg+tL62nH8rHZwgbeO46c7znNngDHAGGAMMAYYA4wBxgBjgDHAGGAMMAYY\nA4wBxgBjgDHAGGAMMAYYA4wB1fdNuu6PPNQOLrDr/ZEu7gwwBhgDjAHGgNH+lfN6rMEjcmeAMcAY\nYAwojTEbdRX70/s5SmM09euYhKbvxSx57nhmnkTETWwfFPgXn/M3i22I+7Zt37veVBTjv/AECsYA\nY4AxwBhgDDAGfAKLeiZpqd0RvAAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10d31c128>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"for component in pca.components_:\n", | |
" show_as_image(component)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [conda root]", | |
"language": "python", | |
"name": "conda-root-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment