Skip to content

Instantly share code, notes, and snippets.

@mkolod
Last active May 12, 2016 08:53
Show Gist options
  • Save mkolod/43643680c250ce0bd98d39465b17a381 to your computer and use it in GitHub Desktop.
Save mkolod/43643680c250ce0bd98d39465b17a381 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Imports\n",
"import numpy as np\n",
"from sklearn.decomposition import PCA"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The matrix shape is (6, 2)\n",
"The matrix rank is 2\n"
]
}
],
"source": [
"# Data\n",
"X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n",
"# 6 rows, 2 columns\n",
"print(\"The matrix shape is %s\" % repr(X.shape)) \n",
"print(\"The matrix rank is %d\" % np.linalg.matrix_rank(X))"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Principal components using PCA directly\n",
"[[ 0.83849224 0.54491354]\n",
" [ 0.54491354 -0.83849224]]\n"
]
}
],
"source": [
"# Run PCA and list PCA components\n",
"pca = PCA(n_components=2)\n",
"pca.fit(X)\n",
"# There are at most 2 principal components because the matrix rank is 2\n",
"print(\"Principal components using PCA directly\")\n",
"print(pca.components_)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Principal components using SVD\n",
"[[ 0.83849224 0.54491354]\n",
" [ 0.54491354 -0.83849224]]\n",
"Eigenvalues using SVD\n",
"[ 6.30061232 0.54980396]\n"
]
}
],
"source": [
"# Get principal components using SVD\n",
"U, s, V = np.linalg.svd(X, full_matrices=True)\n",
"print(\"Principal components using SVD\\n%s\" % V)\n",
"print(\"Eigenvalues using SVD\\n%s\" % s)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Variance explained by consecutive principal components (via PCA)\n",
"[ 0.99244289 0.00755711]\n",
"Variance explained by consecutive principal components (via SVD)\n",
"[ 0.99244289 0.00755711]\n"
]
}
],
"source": [
"# Percentage of variance explained (via eigenvalues)\n",
"ssq = np.multiply(s, s)\n",
"total = np.sum(ssq)\n",
"explained = ssq / total\n",
"print(\"Variance explained by consecutive principal components (via PCA)\")\n",
"print(pca.explained_variance_ratio_)\n",
"print(\"Variance explained by consecutive principal components (via SVD)\")\n",
"print(explained)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Principal components using U from SVD of covariance matrix\n",
"U\n",
"[[-0.83849224 -0.54491354]\n",
" [-0.54491354 0.83849224]]\n",
"s\n",
"[ 6.30061232 0.54980396]\n",
"V\n",
"[[-0.83849224 -0.54491354]\n",
" [-0.54491354 0.83849224]]\n"
]
}
],
"source": [
"# Now let's create the covariance matrix\n",
"XX = np.dot(np.transpose(X), X)\n",
"U2, s2, V2 = np.linalg.svd(XX, full_matrices=True)\n",
"s2 = np.sqrt(s2)\n",
"print(\"Principal components using U from SVD of covariance matrix\")\n",
"print(\"U\\n%s\" % U2)\n",
"print(\"s\\n%s\" % s2)\n",
"print(\"V\\n%s\" % V2)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Eigenvalues:\n",
"[ 6.30061232 0.54980396]\n",
"\n",
"Eigenvetors/principal components:\n",
"[[ 0.83849224 -0.54491354]\n",
" [ 0.54491354 0.83849224]]\n"
]
}
],
"source": [
"# PCA via eigendecomposition\n",
"# Note: while SVD can be performed on the data matrix,\n",
"# the eigendecomposition has to be performed on the covariance matrix.\n",
"\n",
"cov = np.dot(np.transpose(X), X)\n",
"[s3, v3] = np.linalg.eig(cov)\n",
"s3 = np.sqrt(s3)\n",
"print(\"Eigenvalues:\\n%s\\n\" % s3)\n",
"print(\"Eigenvetors/principal components:\\n%s\" % v3)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Original matrix\n",
"[[ 0.37454012 0.95071431 0.73199394 0.59865848 0.15601864 0.15599452\n",
" 0.05808361 0.86617615 0.60111501 0.70807258]\n",
" [ 0.02058449 0.96990985 0.83244264 0.21233911 0.18182497 0.18340451\n",
" 0.30424224 0.52475643 0.43194502 0.29122914]]\n",
"\n",
"After dimensionality reduction to n-1\n",
"[[-1.68447704 -0.62064143 0.45054911 -0.17178848 0.10110122 0.10293963\n",
" 0.31044351 0.00231546 0.07976426]\n",
" [-1.00922335 -0.50413069 0.79493015 -0.25238636 0.1779029 0.18097257\n",
" 0.52195075 0.05390896 0.16797379]]\n",
"\n",
"Approximate mxn matrix from reduced\n",
"[[ 0.32630632 0.88549274 0.7426496 0.57043379 0.15863648 0.15873872\n",
" 0.07771049 0.84249145 0.59002194 0.83481068]\n",
" [-0.04741931 0.87795536 0.84746583 0.17254572 0.1855158 0.18727349\n",
" 0.33191376 0.49136388 0.41630513 0.46991447]]\n",
"\n",
"Difference between original and approx\n",
"[[ 0.0482338 0.06522157 -0.01065566 0.02822469 -0.00261784 -0.0027442\n",
" -0.01962688 0.0236847 0.01109308 -0.1267381 ]\n",
" [ 0.0680038 0.09195449 -0.01502319 0.03979339 -0.00369083 -0.00386898\n",
" -0.02767152 0.03339255 0.01563989 -0.17868533]]\n"
]
}
],
"source": [
"# Dimensionality reduction\n",
"np.random.seed(42)\n",
"Y = np.random.rand(2, 10)\n",
"print(\"Original matrix\\n%s\" % Y)\n",
"U, s, V = np.linalg.svd(Y, full_matrices=True)\n",
"projection = V[:, :V.shape[1] - 1]\n",
"Y_reduce = np.dot(Y, projection)\n",
"print(\"\\nAfter dimensionality reduction to n-1\\n%s\" % Y_reduce)\n",
"Y_approx = np.dot(Y_reduce, np.transpose(projection))\n",
"print(\"\\nApproximate mxn matrix from reduced\\n%s\" % Y_approx)\n",
"print(\"\\nDifference between original and approx\\n%s\" % np.subtract(Y, Y_approx))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment