Last active
May 12, 2016 08:53
-
-
Save mkolod/43643680c250ce0bd98d39465b17a381 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Imports\n", | |
| "import numpy as np\n", | |
| "from sklearn.decomposition import PCA" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "The matrix shape is (6, 2)\n", | |
| "The matrix rank is 2\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Data\n", | |
| "X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n", | |
| "# 6 rows, 2 columns\n", | |
| "print(\"The matrix shape is %s\" % repr(X.shape)) \n", | |
| "print(\"The matrix rank is %d\" % np.linalg.matrix_rank(X))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Principal components using PCA directly\n", | |
| "[[ 0.83849224 0.54491354]\n", | |
| " [ 0.54491354 -0.83849224]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Run PCA and list PCA components\n", | |
| "pca = PCA(n_components=2)\n", | |
| "pca.fit(X)\n", | |
| "# There are at most 2 principal components because the matrix rank is 2\n", | |
| "print(\"Principal components using PCA directly\")\n", | |
| "print(pca.components_)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Principal components using SVD\n", | |
| "[[ 0.83849224 0.54491354]\n", | |
| " [ 0.54491354 -0.83849224]]\n", | |
| "Eigenvalues using SVD\n", | |
| "[ 6.30061232 0.54980396]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Get principal components using SVD\n", | |
| "U, s, V = np.linalg.svd(X, full_matrices=True)\n", | |
| "print(\"Principal components using SVD\\n%s\" % V)\n", | |
| "print(\"Eigenvalues using SVD\\n%s\" % s)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Variance explained by consecutive principal components (via PCA)\n", | |
| "[ 0.99244289 0.00755711]\n", | |
| "Variance explained by consecutive principal components (via SVD)\n", | |
| "[ 0.99244289 0.00755711]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Percentage of variance explained (via eigenvalues)\n", | |
| "ssq = np.multiply(s, s)\n", | |
| "total = np.sum(ssq)\n", | |
| "explained = ssq / total\n", | |
| "print(\"Variance explained by consecutive principal components (via PCA)\")\n", | |
| "print(pca.explained_variance_ratio_)\n", | |
| "print(\"Variance explained by consecutive principal components (via SVD)\")\n", | |
| "print(explained)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Principal components using U from SVD of covariance matrix\n", | |
| "U\n", | |
| "[[-0.83849224 -0.54491354]\n", | |
| " [-0.54491354 0.83849224]]\n", | |
| "s\n", | |
| "[ 6.30061232 0.54980396]\n", | |
| "V\n", | |
| "[[-0.83849224 -0.54491354]\n", | |
| " [-0.54491354 0.83849224]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Now let's create the covariance matrix\n", | |
| "XX = np.dot(np.transpose(X), X)\n", | |
| "U2, s2, V2 = np.linalg.svd(XX, full_matrices=True)\n", | |
| "s2 = np.sqrt(s2)\n", | |
| "print(\"Principal components using U from SVD of covariance matrix\")\n", | |
| "print(\"U\\n%s\" % U2)\n", | |
| "print(\"s\\n%s\" % s2)\n", | |
| "print(\"V\\n%s\" % V2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Eigenvalues:\n", | |
| "[ 6.30061232 0.54980396]\n", | |
| "\n", | |
| "Eigenvetors/principal components:\n", | |
| "[[ 0.83849224 -0.54491354]\n", | |
| " [ 0.54491354 0.83849224]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# PCA via eigendecomposition\n", | |
| "# Note: while SVD can be performed on the data matrix,\n", | |
| "# the eigendecomposition has to be performed on the covariance matrix.\n", | |
| "\n", | |
| "cov = np.dot(np.transpose(X), X)\n", | |
| "[s3, v3] = np.linalg.eig(cov)\n", | |
| "s3 = np.sqrt(s3)\n", | |
| "print(\"Eigenvalues:\\n%s\\n\" % s3)\n", | |
| "print(\"Eigenvetors/principal components:\\n%s\" % v3)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Original matrix\n", | |
| "[[ 0.37454012 0.95071431 0.73199394 0.59865848 0.15601864 0.15599452\n", | |
| " 0.05808361 0.86617615 0.60111501 0.70807258]\n", | |
| " [ 0.02058449 0.96990985 0.83244264 0.21233911 0.18182497 0.18340451\n", | |
| " 0.30424224 0.52475643 0.43194502 0.29122914]]\n", | |
| "\n", | |
| "After dimensionality reduction to n-1\n", | |
| "[[-1.68447704 -0.62064143 0.45054911 -0.17178848 0.10110122 0.10293963\n", | |
| " 0.31044351 0.00231546 0.07976426]\n", | |
| " [-1.00922335 -0.50413069 0.79493015 -0.25238636 0.1779029 0.18097257\n", | |
| " 0.52195075 0.05390896 0.16797379]]\n", | |
| "\n", | |
| "Approximate mxn matrix from reduced\n", | |
| "[[ 0.32630632 0.88549274 0.7426496 0.57043379 0.15863648 0.15873872\n", | |
| " 0.07771049 0.84249145 0.59002194 0.83481068]\n", | |
| " [-0.04741931 0.87795536 0.84746583 0.17254572 0.1855158 0.18727349\n", | |
| " 0.33191376 0.49136388 0.41630513 0.46991447]]\n", | |
| "\n", | |
| "Difference between original and approx\n", | |
| "[[ 0.0482338 0.06522157 -0.01065566 0.02822469 -0.00261784 -0.0027442\n", | |
| " -0.01962688 0.0236847 0.01109308 -0.1267381 ]\n", | |
| " [ 0.0680038 0.09195449 -0.01502319 0.03979339 -0.00369083 -0.00386898\n", | |
| " -0.02767152 0.03339255 0.01563989 -0.17868533]]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "# Dimensionality reduction\n", | |
| "np.random.seed(42)\n", | |
| "Y = np.random.rand(2, 10)\n", | |
| "print(\"Original matrix\\n%s\" % Y)\n", | |
| "U, s, V = np.linalg.svd(Y, full_matrices=True)\n", | |
| "projection = V[:, :V.shape[1] - 1]\n", | |
| "Y_reduce = np.dot(Y, projection)\n", | |
| "print(\"\\nAfter dimensionality reduction to n-1\\n%s\" % Y_reduce)\n", | |
| "Y_approx = np.dot(Y_reduce, np.transpose(projection))\n", | |
| "print(\"\\nApproximate mxn matrix from reduced\\n%s\" % Y_approx)\n", | |
| "print(\"\\nDifference between original and approx\\n%s\" % np.subtract(Y, Y_approx))" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.11" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment