Skip to content

Instantly share code, notes, and snippets.

@JonathanReeve
Created November 8, 2014 20:39
Show Gist options
  • Save JonathanReeve/b98cda8bca6ac9084175 to your computer and use it in GitHub Desktop.
Save JonathanReeve/b98cda8bca6ac9084175 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"worksheets": [
{
"cells": [
{
"metadata": {},
"cell_type": "code",
"input": "def PCA(data, dims_rescaled_data=2):\n \"\"\"\n returns: data transformed in 2 dims/columns + regenerated original data\n pass in: data as 2D NumPy array\n \"\"\"\n import numpy as NP\n from scipy import linalg as LA\n m, n = data.shape\n # mean center the data\n data -= data.mean(axis=0)\n # calculate the covariance matrix\n R = NP.cov(data, rowvar=False)\n # calculate eigenvectors & eigenvalues of the covariance matrix\n # use 'eigh' rather than 'eig' since R is symmetric, \n # the performance gain is substantial\n evals, evecs = LA.eigh(R)\n # sort eigenvalue in decreasing order\n idx = NP.argsort(evals)[::-1]\n evecs = evecs[:,idx]\n # sort eigenvectors according to same index\n evals = evals[idx]\n # select the first n eigenvectors (n is desired dimension\n # of rescaled data array, or dims_rescaled_data)\n evecs = evecs[:, :dims_rescaled_data]\n # carry out the transformation on the data using eigenvectors\n # and return the re-scaled data, eigenvalues, and eigenvectors\n return NP.dot(evecs.T, data.T).T, eigenvalues, eigenvectors\n\ndef test_PCA(data, dims_rescaled_data=2):\n '''\n test by attempting to recover original data array from\n the eigenvectors of its covariance matrix & comparing that\n 'recovered' array with the original data\n '''\n _ , _ , eigenvectors = PCA(data, dim_rescaled_data=2)\n data_recovered = NP.dot(eigenvectors, m).T\n data_recovered += data_recovered.mean(axis=0)\n assert NP.allclose(data, data_recovered)\n\n\ndef plot_pca(data):\n from matplotlib import pyplot as MPL\n clr1 = '#2026B2'\n fig = MPL.figure()\n ax1 = fig.add_subplot(111)\n data_resc, data_orig = PCA(data)\n ax1.plot(data_resc[:, 0], data_resc[:, 1], '.', mfc=clr1, mec=clr1)\n MPL.show()",
"prompt_number": 4,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "import numpy as NP\ndf='/home/jon/Dropbox/Research/stylometry-experiments/iris.csv' \ndata = NP.loadtxt(df, delimiter=',')",
"prompt_number": 14,
"outputs": [],
"language": "python",
"trusted": true,
"collapsed": false
},
{
"metadata": {},
"cell_type": "code",
"input": "plot_pca(data)",
"prompt_number": 14,
"outputs": [
{
"output_type": "display_data",
"metadata": {},
"png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEACAYAAAC9Gb03AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF3lJREFUeJzt3XuMXNV9wPGvjRdsh6wJgtpgu7VKSAIOpAbkkAcwLSE1\nbgVFCg1IqCGVSJQ0pFVCShyMbIRa0xSaYAVaqoaKogbSBEQdjEtIwmCKEhMCbGjAPFw7BRJewmAb\nSlkb9487Y4bxPO7MfZx773w/0mpnd+/MPbt797fn/M7vnAuSJEmSJEmSJEmSJEmSJElK2bXAs8BD\nXb5eA14GHmi8Lc+nWZKktJwALKJ3oF+TW2skSW8xNYXXuBvY2ueYKSmcR5I0hDQCfT+7gQ8CE8Bt\nwJE5nFOSlLIFdE/dvB2Y2Xh8KvBYHg2SJEWm5XCO7S2P1wFXAwcCL7YedNhhh+3etGlTDs2RpErZ\nBLyz1wF5pG5m82aOfnHj8YvtB23atIndu3cX/m3FihXB21CFNtpO21n0t7K0EzisXxBOo0d/A3AS\ncBDwJLACGGt87RrgY8BngJ3Aq8BZKZxTkhRTGoH+7D5fv6rxJkkKII/UTaXUarXQTeirDG0E25k2\n25musrQzjiLVt+9u5JskSTFNmTIF+sRye/SSVHEGekmqOAO9JFWcgV6SKs5AL0kVZ6CXpIoz0EtS\nxRnoJaniDPSSVHEGekmquDz2o5dysWz5BJu37GD6jGmsvuIYxsfH+j9JGgH26FUZm7fsYMNPX+Su\n9c+x7OKJ0M2RCsNAr8qYPiMaoB591CxWXfq+wK2RisPdK1UZ27ZNsuziCVZd+j7TNhoZcXavNNBL\nUom5TbEkyaobKQQrhJQne/RSAFYIKU8GeikAK4SUJydjpQCsEFJarLqRpIqz6kaSZKCXpKoz0EtS\nxRnoJaniDPSSVHFpBPprgWeBh3ocsxp4HJgAFqVwTklSTGkE+n8GlvT4+lLgncDhwKeAv0/hnJKk\nmNII9HcDW3t8/TTgusbjDcABwOwUzitJiiGPTc3mAk+2fPwUMI8o3SPF5kZg0nDy2r2yfdWWS2AV\nWzPAb3x0Gy9v2xl97uIJrrryuMAtk8ohj0D/NDC/5eN5jc/tZeXKlXse12o1arValu1SSTR3emxy\nIzCNsnq9Tr1eH+g5ae11swD4HnBUh68tBT7XeH888PXG+3budaOOzj1vA3etf46FR45z6KEzuHzV\nItM2UkNem5rdAJwEHESUd18BNP8Kr2m8/wZRZc4rwCeB+zu8joFeHbnTo9Sdu1dKUsXFCfTeSjAF\nVoNIKjK3QEiBt4WTVGQG+hR4WzhJRWaOPgVOFkoKxclYSao4byUoSTLQS1LVGeglqeKso5cScA2F\nysAevZSAayhUBgZ6KQHXUKgMLK+UEij7GgpTT+VnHb2kns465549e/0vXXKIN3MpITc1U2nYs8xX\n8+f92BPbAVNPVWeOXoXgpGa+mj/vrVsnmTN7Otdf+wH/uVaYgV6F4KRmvlp/3rffWjPIV5w5ehVC\nUSc1q5pSKurPW4NzMlYaQmtw37H9dX72wEuAk5UqJidjpSE089cABx+0H2BKSeVmoNdI65Saac1f\nX3Xlcaz66sMdUxyDpnWqmgZS8TkZq5HWqdpn9RXHsHTJIVx/7QeYN3cmV115XMegPGilkJVFCsUe\nvUZap2qf8fGxWLn4QSuFrCxSKE7GaqQlqT4Z9LlWuigLVt1IUsVZdSMJcCJ41DkZK40AJ4JHmz16\nKYay94jjTASfvORHPP/8a0wbm8qam05k3tyZeTZRGbJHn8Cy5ROcdc49nHveBrZtmwzdHGWo7D3i\n1pLRbv+knn/+Nbbv2MXWrZOcefY9ObdQWbJHn0DrCsplF0+4PL7C4vSIi9zrj1MyOm1sKrCLGTOm\n8p0bPpRPw5SLNHr0S4CNwOPAhR2+XgNeBh5ovC1P4ZyFYF306IjTIw7R609zVLnmphOZM3s631/7\nu3ulbRy9llvS8sp9gEeBjwBPAz8FzgYeaTmmBnwBOK3Pa5WuvNK6aLU697wN3LX+OY4+alZu+7vn\ndYco70RVXHHKK5P26BcDTwBbgEngRuD0Tm1JeJ5Cag6HDfKCeL3+tOU1qnT0Wm5JA/DHgN8Hzmt8\nfA7wfuD8lmNOAm4GniLq9V8APNzhtYL16IucW1V55XFd5TWqjHMe/47CyGPBVJzIfD8wH3gVOBW4\nBXhXpwNXrly553GtVqNWqyVsXjxxJ1W9kDWIPK6ruPvyJBXnPBYn5KNer1Ov1wd6TtJA/zRREG+a\nT9Rzb7W95fE64GrgQODF9hdrDfR5ijss9ULWIEbtujK9k4/2TvAll1zS9zlJc/T3AYcDC4B9gY8D\na9qOmc2bw4rFjcd7BfmQ4uZWvZA1iFG7rkLMUSieNCZJTwW+TlSB801gFfDpxteuAf4M+Aywkyh9\n8wXgJx1ep/BVN1bZKAteV0rC3SslqeLcvVIqkZCT/RYaVJt73UgpSGPlaMj9dMq+l496M9BLKUgj\nUIaclK3KhLA6M9BLKUgjUIasWul2bve4qQYnYzUyssxDZ105EyqH7h43xedkrNQiy4VJrStHkwTl\nbs8NtajKlE41mLoZgMPYcssraCXJ13d7bqiA6yKoajB1MwCHseWWZXqltSe+c/IN7vnxC0NtV9xt\nq+O02245ZXW4YCplcfcb94+o+tp/x5/67L17OgGnnDybsbGpQwXlvFbJ2mmpDnP0KVt9xTGx/gir\nskmVumv/HbemVi6/bNHQQTqv3SjNvY8WA/0A4v4R+kdUfZ1+x506AUUd3bV3WoraTqXD1E0G3KSq\n+tp/x90CZVlSJGVpp/Zm6iaQvIbfSscwvdn233G3dF1ZRndlaaeGY3llRizFLI8sty8oS3likdvp\n31Jypm5S1Noz3LFjkp/dvxVwKFx0caupejFdlx3TSr1ZXpmTZoDf+Nh2Xn456nEcfNC+PP/C64mC\nh/KRRZAe1cnNYb/vXs9L4x9xlcUJ9KZuUtAc+jeD/NFHzeLmfzuhsENhvVUz357m76ls2/6mlR4Z\n5vtetnyCteue7vq8IqeVysLJ2BQ087MLjxzn0ENm7Kmjbh1iNnssv3zyVeYeOoP99x8bqZ7eqCnb\n5GZaaz+G+b43b9nB9h27AJg1PrbX8yxuSM5An4I4C6la/5CeeeY1wMVUVRZ3cV0SaaZJ0vrH1Ov7\n7tbe5rlnjU/j1ltOtPOTAXP0GWm/qD//xfu5a/1z7L//NHbs2Gm+UQPpFCTjTlL22q6h+bw8JpO7\ntdeJ7GTM0QfUnqts5hnXrTnJfKMG1in3HbcX3v7cTs/LYp6iXbf25nHuUWfqJoFeQ+f2i7o1z5jG\nvuUaLZ2CZNz0UNztGpqyui7zSGepM1M3CfQaOscZjlofrLh6XU/9AvOgqRGvy3JxC4SM9Ro6N3vw\nRx+7jlde3cnUqVNYc/MJHPHuWbGeL7XqVXnSr2Km03MHGY2q/MzRJxCnvveVV3fyxhuwc+duzjjz\nPwd+vqopzWX9w5Y0Wrc+OkzdpKRbD+nwhbeyc+dupkyBtf9+4lt69BpdaaZHuqVmXG06Gqy6yVG3\nHtKam09gv/2mGuRHUK9ee5rpkW5VK/ba1WSgT0m3P9wj3j2LjT//A4P8CAodaOPMIRnkR0MagX4J\nsBF4HLiwyzGrG1+fABalcM7CsYekdqEDbZWuSbcqTiZpjn4f4FHgI8DTwE+Bs4FHWo5ZCnyu8f79\nwJXA8R1eq9Q5+rRZY19+Wa34HMVrw5LP7vLI0S8GngC2AJPAjcDpbcecBlzXeLwBOACYnfC8lVe2\n3Q+1t6x67aN4bfSb07DH31vSQD8XeLLl46can+t3zLyE5608a5nVzSheG/3SUKP4z28QSRdMxc21\ntA8rOj5v5cqVex7XajVqtdpQjaoCl4urm1G8NvptVTxK//zq9Tr1en2g5yTN0R8PrCSakAVYBrwB\n/E3LMf8A1InSOhBN3J4EPNv2WpXP0Y9iblXxeX0Mb5R3wMwjR38fcDiwANgX+Diwpu2YNcCfNB4f\nD7zE3kF+JDi8VC9Vvj6yzqFbLtpb0tTNTqKKmtuJKnC+SVRx8+nG168BbiOquHkCeAX4ZMJzltYo\nDS81uCpeH53up+wNd/LnFgg5GuXhpfqr4vXRWhYJuOVCBuKkbgz0CZlXVRHlfV12O19zT532+ykr\nPe51k4Mq51VVXnlfl93O1yyL/NZ1H+Qfr15skA/E/egTyjqv6ohBw8g739/vNoEKy9RNQknu/BOH\nS781jLzz/d3Od/KSH/H8868xbWwqa246kXlzZ2bellFjjj6wuEHafcNVVUcfexvbd+wCYM7s6fx4\n/SmBW1Q93kpwAFmkSOIOn3vdCm4UV0GqOim7aWNTgV3MmDGV79zwoa7HVeX7LSonYxuymLyKu01s\n6O1sVTxVmeRfc9OJzJk9ne+v/d2eaZuqfL9FZY++IYvJq9aJqF49FnvtaleVxVPz5s6Mla6pyvdb\nVOboG7KevHJSVYOo0uKpOGmZrIsaqszJ2AKJM6nqBa1uynxtJO3k2EnqzQVTBRInX2+eUt2U+dqI\nm5bptvGZaZ3kDPQ5iTOp6gWtbpIGy5DiFiX0W11refHwTN3krNcQvEp5WaUr7rUxaJpj2JRQFqkk\n14wMxxx9AZlvVJYGDZbDXo9ZXMd2dN4q7j9Tc/QB9Bs6m55RlgZNc8S5Hjtd01mWIxvkI2nOy9ij\nT1m/no69FuUhbm8wzvXY6Zou4nVc5sqkTuKOzkzdBGCeUUWQZmqlLNd01dKicf+ZmroJwAoBFUGa\nqZWiXdOjUoaZZirLHn3Gli2f4Id3PsNLL08yY/o+HPXeA7h6tXlIJTeqFVzdeu5V/p57MXUTUKeb\nIjdVYVipsJYtn2Dtuqf3bAFcxWuq3+0Ji55Kyoupm4CaM+btQX7hkeOVGFYqrM1bduwJ8rPGp1Xy\nmkqygGqYhWNFXGyWFnevzEgzX7jwyHEOPmg/mAJjY1O5fJU3R1Zyzetr1vgYt95yYiWvqSS3J+x1\nj4c0n1MWBvqM9Np6uGplYMpfVba2Hnb77n5/Q3HXB7S+RtUmc1uZow+gamVg0rCyWpk7zPqAVZe+\nr5T/PL2VYEENskGVPX9V2bC96H7Pi5PeaX+NOM8pK3v0AXTrbbQH9k999l57/kpNqI5DFmWgaZRS\nVqUc0/LKkmkfSr7y6i7LyJSaUClDU5XZMnVTYJ16OZ2Go+09DtM5GlaoycYqT3KWRZIe/YHAt4Hf\nArYAfwy81OG4LcA2YBcwCSzu8noj1aMfdqMoe0caVqhURVVSJEll1UnLOnXzVeCFxvsLgXcAX+5w\n3GbgWODFPq83UoF+2NV9rgpU0yiO7sr8PWfVSct6ZexpwHWNx9cBf9SrLQnOU0nDbhRVtA2mFE6Z\n7yM7rDJ/zyFTWEkC/Wzg2cbjZxsfd7Ib+AFwH3BegvNVyrA703lzBjWNYu67zN9zyE5av572HcCc\nDp+/iKgX/46Wz71IlLdvdwjwa+DgxuudD9zd4bjdK1as2PNBrVajVqv1aV55lXkIqmLIKvdd5GvT\nfD/U63Xq9fqejy+55BLIMEe/EagBzxAF8zuB9/R5zgpgB3BFh6+NRI5+z66Wj27j5W07ASdVVSxO\n+JdL1jn6NcAnGo8/AdzS4ZiZwNsbj98GfBR4KME5S2/PrpaNIF/GIaiqrczpEXWWJNBfBpwCPAb8\nXuNjgEOBtY3Hc4jSNA8CG4Bbge8nOGfpte5qecpHZjupqsJxwj+5om15XKRqmJFI3ZhjlLIXep4h\nz/SXNx4pIKtmpOyFLsMsWvrLQB9I0YZ2UpWEDrRFS3+ZusmJO1NK+RmlFKm7VxaIO1NKyoK7VxZI\nnJ0pJSkL9uhzMkpDSUn5MXVTcKFLwCSVn+WVBRe6BEzSaDBHH1DoEjCNLkeT8VXhZ2WPPqCi1dpq\ndDiajK8KPyt79AE1V8lKeXM0GV8VflZOxkojyCqw+Ir+s7LqRpIqzgVTFVGFySBJ4TgZWwJVmAyS\nFI49+hKowmSQVGRVHzXboy8ByzClbFV91GyPvgQsw5SyVfVRs1U3OVi2fIIf3vkMr0/u5r0LZ3G1\nd5iSCqXoJZS9WF5ZEK170YM3GpGUHjc1K4jmsBBg4ZHjlRwaSioue/Q52LZtkguWPQC74fLLFpVu\naCipuEzdSFLFuTK2gqpe7yspfeboS6bq9b6S0megL5mq1/tKSp85+pIpc72vpPQ5GStJFZd1Hf2Z\nwC+AXcAxPY5bAmwEHgcuTHA+SdIQkgT6h4AzgPU9jtkH+AZRsD8SOBs4IsE5JUkDSlJeuTHGMYuB\nJ4AtjY9vBE4HHklwXknSALKuupkLPNny8VONz0mSctKvR38HMKfD578CfC/G6zu7KkmB9Qv0pyR8\n/aeB+S0fzyfq1Xe0cuXKPY9rtRq1Wi3h6SWpWur1OvV6faDnpFFeeSdwAfCzDl+bBjwKnAz8CriX\naEK2U47e8kpJGlDW5ZVnEOXfjwfWAusanz+08THATuBzwO3Aw8C3cSJWknLlgilJKjFvPCJJMtBL\nUtW5H32O3EteUgj26HPkXvKSQjDQ58i95CWFYNVNjtxLXlLa3I9ekirOm4NLUgGELsQwR5+DZcsn\nOOucezj3vA1s2zYZujmScha6EMNAn4PQv2RJYYUuxDDQ5yD0L1lSWKuvOIalSw7h+ms/EKQQw8nY\nHFhtIykrVt1IUsW5qZkkyUAvSVVnoJekijPQS1LFGeglqeIM9JJUcQZ6Sao4NzUrmNCbH0mqHnv0\nBeO+OJLSZo++YNwXRyqHMo2+7dEXTOjNjyTFU6bRtz36ghkfH+OqK48L3QxJfZRp9O2mZpI0hKLs\nSuvulZJUce5eKUlKFOjPBH4B7AKO6XHcFuDnwAPAvQnOJ0kaQpJA/xBwBrC+z3G7gRqwCFic4HyF\nUK/XQzehrzK0EWxn2mxnusrSzjiSBPqNwGMxjy3SXEAiZfjll6GNYDvTZjvTVZZ2xpFHjn438APg\nPuC8HM4nSWrRr47+DmBOh89/BfhezHN8CPg1cHDj9TYCd8dtoCQpmTRSKncCXwTuj3HsCmAHcEWH\nrz0BHJZCeyRplGwC3tnrgLRWxnb7hzET2AfYDrwN+ChwSZdjezZUkpS/M4Angf8FngHWNT5/KLC2\n8fi3gQcbb/8FLMu5jZIkSZLy9kXgDeDA0A3p4lJggmiU8kNgftjmdPW3wCNEbb0ZmBW2OV3FXXgX\nyhKiAoLHgQsDt6Wba4Fnida2FNl8ojm9XxCN8D8ftjkdTQc2EP19PwysCtucvvYhWowatzimEOYD\n/wFspriB/u0tj88H/ilUQ/o4hTfLZy9rvBXRe4B3EQWAogX6fYiKBBYAY0R//EeEbFAXJxAtSCx6\noJ8D/E7j8f7AoxTz5zmz8X4a8BPgwwHb0s8XgH8F1vQ6qGh73fwd8JehG9HH9pbH+wMvhGpIH3cQ\njYwg6qHMC9iWXgZZeJe3xUSBfgswCdwInB6yQV3cDWwN3YgYniH6ZwlR9d0jRHN6RfNq4/2+RP/s\nXwzYll7mAUuJOpul2dTsdOApon1xiu6vgP8BPkFxe8qt/hS4LXQjSmguUcFB01ONzym5BUSjkA2B\n29HJVKJ/SM8SjTQfDtucrr4GfIk3O3Rd5X3jkW4LsC4iqsj5aMvnQm6b0G+h2EWNty8T/bA/mV/T\n3iLOgraLgNeBb+XVqA7SWHgXgvtmZ2N/4LvAnxP17IvmDaIU0yzgdqK9uuoB29PJHwLPEeXna2Gb\nEt97if57bm68TRINl38jYJvi+E2iSaWiOhe4h2iCqeiKmKM/nmjOqGkZxZ2QXUDxc/QQzXXcDvxF\n6IbEdDFwQehGdPDXRKPNzUQ7D7wC/EvQFg2hyJOxh7c8Ph+4PlRD+lhCVN1wUOiGxHQncGzoRrSZ\nRrTqcAFRvraok7FQjkA/hSgYfS10Q3o4CDig8XgG0e68J4drTiwnUeyRcVf/TXED/XeJ/qAeBG6i\nuKOOx4FfEg3tHgCuDtucrrotvCuKU4mqQ56guAv+bgB+Bfwf0c8yVCqxnw8TpUUe5M3rcknQFu3t\nKKLtXB4kmi/8UtjmxHISfapuJEmSJEmSJEmSJEmSJEmSJEmSJKky/h9/iNWzzLPTIwAAAABJRU5E\nrkJggg==\n",
"text": "<matplotlib.figure.Figure at 0x7f1dd01d3780>"
}
],
"language": "python",
"trusted": true,
"collapsed": false
}
],
"metadata": {}
}
],
"metadata": {
"name": "",
"signature": "sha256:d16cc6d843b854f2a4de784d6550ee2412a461cb76dba931f85ab70c70017633"
},
"nbformat": 3
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment