Skip to content

Instantly share code, notes, and snippets.

@grahamanderson
Created August 12, 2016 19:29
Show Gist options
  • Save grahamanderson/0450ef1072026559bdacbd198f58f4d0 to your computer and use it in GitHub Desktop.
Save grahamanderson/0450ef1072026559bdacbd198f58f4d0 to your computer and use it in GitHub Desktop.
projects/dnc_leaks/DNC-Modeling.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "import matplotlib.pyplot as plt\n%matplotlib inline\nimport pandas as pd\nimport re\nimport numpy as np\nimport pickle\nimport nltk\n\nstopwords = nltk.corpus.stopwords.words('english')\n# load nltk's SnowballStemmer as variabled 'stemmer'\nfrom nltk.stem.snowball import SnowballStemmer\nstemmer = SnowballStemmer(\"english\")\n\n",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "df = pickle.load( open( \"dnc_df_no_everything.pkl\", \"rb\" ) )",
"execution_count": 4,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "# Helper Functions\n# here I define a tokenizer and stemmer which returns the set of stems in the text that it is passed\n\n\ndef getStems(text):\n stemmmer = nltk.stem.porter.PorterStemmer()\n return [stemmer.stem(x) for x in getTokens(text)]\n\n# def get\n# not in stopwords add it into an array\n\ndef tokenize_and_stem(text):\n # first tokenize by sentence, then by word to ensure that punctuation is caught as it's own token\n tokens = [word for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n filtered_tokens = []\n # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation)\n for token in tokens:\n if re.search('[a-zA-Z]', token):\n filtered_tokens.append(token)\n stems = [stemmer.stem(t) for t in filtered_tokens]\n return stems\n\n\ndef tokenize_only(text):\n # first tokenize by sentence, then by word to ensure that punctuation is caught as it's own token\n tokens = [word.lower() for sent in nltk.sent_tokenize(text) for word in nltk.word_tokenize(sent)]\n filtered_tokens = []\n # filter out any tokens not containing letters (e.g., numeric tokens, raw punctuation)\n for token in tokens:\n if re.search('[a-zA-Z]', token):\n filtered_tokens.append(token)\n return filtered_tokens",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "from sklearn.feature_extraction.text import TfidfVectorizer\n\n#define vectorizer parameters\ntfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.8, max_features=200000, \n min_df=0.2, use_idf=True, ngram_range=(1,3))\n# max_df=0.8, max_features=200000,\n# min_df=0.2, stop_words='english',\n# use_idf=True, tokenizer=tokenize_and_stem, ngram_range=(1,3))\n\n\n\ntfidf_matrix = tfidf_vectorizer.fit_transform(df.body) #fit the vectorizer to synopses\n\nprint(tfidf_matrix.shape)",
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"text": "(12286, 24)\n",
"name": "stdout"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "X3 = tfidf_matrix",
"execution_count": 13,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "from sklearn.cluster import KMeans",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "c = 5\nmodelKM = KMeans(n_clusters = c)\nmodelKM.fit(X3)\n\nclusters = modelKM.labels_\n",
"execution_count": 15,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "df.head()",
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>from_name</th>\n <th>from_email</th>\n <th>to</th>\n <th>subject</th>\n <th>body</th>\n <th>cluster</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Tue, 17 May 2016 19:51:22 -0700</td>\n <td>Maureen Garde</td>\n <td>[email protected]</td>\n <td>\"Davis, Marilyn\" &lt;[email protected]&gt;</td>\n <td>Re: CT To Automatically Register 400,000 Voters</td>\n <td>how many more states can we get to follow conn...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Mon, 2 May 2016 22:19:09 -0400</td>\n <td>Contribution</td>\n <td>[email protected]</td>\n <td>&lt;[email protected]&gt;, &lt;[email protected]&gt;, &lt;olszewsk...</td>\n <td>Contribution: DE008 - DWS WLF Reception / Shek...</td>\n <td>contribution data page de...</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Wed, 27 Apr 2016 03:48:42 -0600</td>\n <td>POLITICO</td>\n <td>[email protected]</td>\n <td>&lt;[email protected]&gt;</td>\n <td>POLITICO's Daily Congress Digest for Wednesday...</td>\n <td>politicos daily congress digest for wednesday ...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Thu, 19 May 2016 08:42:10 -0700</td>\n <td>Corinne Matti</td>\n <td>[email protected]</td>\n <td>\"Miranda, Luis\" &lt;[email protected]&gt;</td>\n <td>Re: America's Newsroom (FNC) - Luis Miranda (S...</td>\n <td>yup they also have iq media and snapstreamon m...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Fri, 20 May 2016 11:30:27 -0700</td>\n <td>Brinster Jeremy</td>\n <td>[email protected]</td>\n <td>\"Walker, Eric\" &lt;[email protected]&gt;, \"Wei, Shu-Ye...</td>\n <td>RE: For Comms Approval: Round up of Trump-Chri...</td>\n <td>looks good please dont forgot to change the su...</td>\n <td>3</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " date from_name \\\n0 Tue, 17 May 2016 19:51:22 -0700 Maureen Garde \n1 Mon, 2 May 2016 22:19:09 -0400 Contribution \n2 Wed, 27 Apr 2016 03:48:42 -0600 POLITICO \n3 Thu, 19 May 2016 08:42:10 -0700 Corinne Matti \n4 Fri, 20 May 2016 11:30:27 -0700 Brinster Jeremy \n\n from_email \\\n0 [email protected] \n1 [email protected] \n2 [email protected] \n3 [email protected] \n4 [email protected] \n\n to \\\n0 \"Davis, Marilyn\" <[email protected]> \n1 <[email protected]>, <[email protected]>, <olszewsk... \n2 <[email protected]> \n3 \"Miranda, Luis\" <[email protected]> \n4 \"Walker, Eric\" <[email protected]>, \"Wei, Shu-Ye... \n\n subject \\\n0 Re: CT To Automatically Register 400,000 Voters \n1 Contribution: DE008 - DWS WLF Reception / Shek... \n2 POLITICO's Daily Congress Digest for Wednesday... \n3 Re: America's Newsroom (FNC) - Luis Miranda (S... \n4 RE: For Comms Approval: Round up of Trump-Chri... \n\n body cluster \n0 how many more states can we get to follow conn... 4 \n1 contribution data page de... 2 \n2 politicos daily congress digest for wednesday ... 4 \n3 yup they also have iq media and snapstreamon m... 4 \n4 looks good please dont forgot to change the su... 3 "
},
"metadata": {},
"execution_count": 23
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "df['tokendf['tokens'] = tfidf_vectorizer.get_feature_names()\ndf['cluster']= clusters",
"execution_count": 22,
"outputs": [
{
"output_type": "error",
"ename": "ValueError",
"evalue": "Length of values does not match length of index",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-22-3a749e7f6577>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'tokens'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtfidf_vectorizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_feature_names\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'cluster'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m=\u001b[0m \u001b[0mclusters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 2355\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2356\u001b[0m \u001b[0;31m# set column\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2357\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_item\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2358\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2359\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_setitem_slice\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_set_item\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2422\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ensure_valid_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2423\u001b[0;31m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sanitize_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2424\u001b[0m \u001b[0mNDFrame\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_item\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_sanitize_column\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 2576\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2577\u001b[0m \u001b[0;31m# turn me into an ndarray\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2578\u001b[0;31m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_sanitize_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2579\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2580\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m//anaconda/lib/python3.5/site-packages/pandas/core/series.py\u001b[0m in \u001b[0;36m_sanitize_index\u001b[0;34m(data, index, copy)\u001b[0m\n\u001b[1;32m 2768\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2769\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2770\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Length of values does not match length of '\u001b[0m \u001b[0;34m'index'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2771\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2772\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mPeriodIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Length of values does not match length of index"
]
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "df.head()",
"execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>date</th>\n <th>from_name</th>\n <th>from_email</th>\n <th>to</th>\n <th>subject</th>\n <th>body</th>\n <th>cluster</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Tue, 17 May 2016 19:51:22 -0700</td>\n <td>Maureen Garde</td>\n <td>[email protected]</td>\n <td>\"Davis, Marilyn\" &lt;[email protected]&gt;</td>\n <td>Re: CT To Automatically Register 400,000 Voters</td>\n <td>how many more states can we get to follow conn...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Mon, 2 May 2016 22:19:09 -0400</td>\n <td>Contribution</td>\n <td>[email protected]</td>\n <td>&lt;[email protected]&gt;, &lt;[email protected]&gt;, &lt;olszewsk...</td>\n <td>Contribution: DE008 - DWS WLF Reception / Shek...</td>\n <td>contribution data page de...</td>\n <td>2</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Wed, 27 Apr 2016 03:48:42 -0600</td>\n <td>POLITICO</td>\n <td>[email protected]</td>\n <td>&lt;[email protected]&gt;</td>\n <td>POLITICO's Daily Congress Digest for Wednesday...</td>\n <td>politicos daily congress digest for wednesday ...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Thu, 19 May 2016 08:42:10 -0700</td>\n <td>Corinne Matti</td>\n <td>[email protected]</td>\n <td>\"Miranda, Luis\" &lt;[email protected]&gt;</td>\n <td>Re: America's Newsroom (FNC) - Luis Miranda (S...</td>\n <td>yup they also have iq media and snapstreamon m...</td>\n <td>4</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Fri, 20 May 2016 11:30:27 -0700</td>\n <td>Brinster Jeremy</td>\n <td>[email protected]</td>\n <td>\"Walker, Eric\" &lt;[email protected]&gt;, \"Wei, Shu-Ye...</td>\n <td>RE: For Comms Approval: Round up of Trump-Chri...</td>\n <td>looks good please dont forgot to change the su...</td>\n <td>3</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": " date from_name \\\n0 Tue, 17 May 2016 19:51:22 -0700 Maureen Garde \n1 Mon, 2 May 2016 22:19:09 -0400 Contribution \n2 Wed, 27 Apr 2016 03:48:42 -0600 POLITICO \n3 Thu, 19 May 2016 08:42:10 -0700 Corinne Matti \n4 Fri, 20 May 2016 11:30:27 -0700 Brinster Jeremy \n\n from_email \\\n0 [email protected] \n1 [email protected] \n2 [email protected] \n3 [email protected] \n4 [email protected] \n\n to \\\n0 \"Davis, Marilyn\" <[email protected]> \n1 <[email protected]>, <[email protected]>, <olszewsk... \n2 <[email protected]> \n3 \"Miranda, Luis\" <[email protected]> \n4 \"Walker, Eric\" <[email protected]>, \"Wei, Shu-Ye... \n\n subject \\\n0 Re: CT To Automatically Register 400,000 Voters \n1 Contribution: DE008 - DWS WLF Reception / Shek... \n2 POLITICO's Daily Congress Digest for Wednesday... \n3 Re: America's Newsroom (FNC) - Luis Miranda (S... \n4 RE: For Comms Approval: Round up of Trump-Chri... \n\n body cluster \n0 how many more states can we get to follow conn... 4 \n1 contribution data page de... 2 \n2 politicos daily congress digest for wednesday ... 4 \n3 yup they also have iq media and snapstreamon m... 4 \n4 looks good please dont forgot to change the su... 3 "
},
"metadata": {},
"execution_count": 19
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "pd.Series(df.body[0].split(\" \")).value_counts()",
"execution_count": 21,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "the 36\nto 23\n 21\na 15\nof 14\nand 9\nconnecticut 9\nstate 8\nin 8\nis 7\nvoters 7\nwere 6\ndmv 5\nstates 5\nthat 5\nregister 5\nautomatic 5\nmay 5\nvoter 4\nwill 4\nover 4\nat 4\non 4\nthis 4\nit 4\nthey 4\nby 4\nfrom 4\nregistration 4\nautomatically 4\n ..\nvisit 1\nfollow 1\nauthorized 1\nhas 1\nshould 1\nth 1\ngo 1\nwhile 1\nregarding 1\nexpansiondemocratic 1\ncloser 1\nwroteconnecticut 1\nbefore 1\nbig 1\ndesignated 1\nunderstanding 1\ncollect 1\npratt 1\nprimaryhttpthinkprogressorgpoliticsoregonclosedprimary 1\nmajority 1\npmon 1\nfuture 1\nreally 1\noffering 1\nmarilyn 1\niphoneon 1\nhoping 1\ntwitter 1\nawesomesent 1\nproblem 1\ndtype: int64"
},
"metadata": {},
"execution_count": 21
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "from sklearn.decomposition import PCA\nfrom sklearn.decomposition import TruncatedSVD\ntsvd = TruncatedSVD(10)\ntsvd_data = tsvd.fit_transform(X)",
"execution_count": 32,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "tsvd_data",
"execution_count": 15,
"outputs": [
{
"metadata": {},
"execution_count": 15,
"data": {
"text/plain": "array([[ 4.81792362e+01, -2.08029647e+00],\n [ 1.10853647e+00, 3.25939466e-02],\n [ 1.34308897e+01, 2.30990310e-01],\n ..., \n [ 2.07420533e+01, 1.15874045e-01],\n [ 3.26191232e+00, 1.11195556e+00],\n [ 5.41718397e+00, 9.67399758e-01]])"
},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "tsvd.explained_variance_ratio_",
"execution_count": 23,
"outputs": [
{
"metadata": {},
"execution_count": 23,
"data": {
"text/plain": "array([ 0.75159268, 0.06964022, 0.02216457, 0.01992867, 0.01345506,\n 0.01144528])"
},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "pd.DataFrame(np.cumsum(tsvd.explained_variance_ratio_)).plot()\nplt.legend(['Variance'])",
"execution_count": 34,
"outputs": [
{
"metadata": {},
"execution_count": 34,
"data": {
"text/plain": "<matplotlib.legend.Legend at 0x13d79e668>"
},
"output_type": "execute_result"
},
{
"metadata": {},
"data": {
"text/plain": "<matplotlib.figure.Figure at 0x13d79e5c0>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEACAYAAAC9Gb03AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3Xt4VOW59/HvHQEVKYhA1QJGeSl4qH3FKqJ4GMRKsC2g\nrRWrgFoRq+5u6wnr1hLdrZco8vpad3dLtYCipd1YCLQgAWRQWoGoKKCJ8QQiYAsFqhGBkNz7jzUh\nQ0iYCZlkzeH3ua65MrNOcyeE36w863meZe6OiIhkr7ywCxARkealoBcRyXIKehGRLKegFxHJcgp6\nEZEsp6AXEclySQW9mRWYWZmZlZvZ2HrW32FmK83sdTNbbWZ7zOzIZPYVEZHmZYn60ZtZHlAODAQ2\nAiXAcHcva2D7bwO3uvtFjd1XRERSL5kz+r7Au+6+zt0rgenA0ANsfyXw+4PcV0REUiyZoO8KrI97\n/XFs2X7M7HCgAHi+sfuKiEjzSPXF2O8AS919e4qPKyIiB6lVEttsAI6Le90ttqw+w6lttmnUvmam\nSXdERBrJ3S3RNsmc0ZcAPc0s38zaEIT57LobmVkH4AKgqLH7xhWcVo9x48aFXoNqyp6a0rUu1ZS5\nNSUr4Rm9u1eZ2S1AMcEHw1PuXmpmY4LVPim26TBgvrt/kWjfpKsTEZEmS6bpBnd/AehdZ9lv6rye\nCkxNZl8REWk5Ghl7AJFIJOwS9qOakpOONUF61qWakpOONSUr4YCplmJmni61iIhkAjPDk7gYm1TT\njYjktuOPP55169aFXUbOys/PZ+3atQe9v87oRSSh2Jlj2GXkrIZ+/sme0auNXkQkyynoRUSynIJe\nRCTLKehFROKsX7+e9u3bZ9U1CQW9iGS0wYMHU1hYuN/yoqIijj32WKqrqxt1vO7du/Ppp59ilvAa\nZ8ZQ0ItIRhs1ahTTpk3bb/m0adMYMWIEeXnJx1xVVVUqS0sbCnoRyWjDhg3jn//8J0uXLt27bPv2\n7fz5z39mxIgRzJ07l9NPP50OHTqQn5/P/fffv3e7devWkZeXx+9+9zvy8/MZOHDg3mU1fwlMmTKF\nk08+mfbt29OzZ08mTZq0d/8lS5bQvXt3Jk6cyNFHH03Xrl2ZMmXK3vU7d+7k9ttv5/jjj6djx46c\nf/757Nq1C4Bly5bRv39/OnbsSJ8+fViyZEnz/ZDCnn0tbhY2F5H0lO7/P0ePHu2jR4/e+/rXv/61\n9+nTx93do9Gor1mzxt3dV69e7cccc4wXFRW5u/vatWvdzHzUqFG+Y8cO37lzp69du9bz8vK8qqrK\n3d3nzp3rH374obu7v/TSS962bVtfuXLl3mO3atXKCwsLfc+ePT537lxv27atb9++3d3db7rpJh8w\nYIBv2rTJq6ur/ZVXXvHdu3f7hg0bvFOnTv7CCy+4u/vChQu9U6dOvmXLlnq/v4Z+/rHlifM1mY1a\n4pHuv0giuSzR/09IzeNgLV261I888kjftWuXu7v379/fH3vssXq3vfXWW/22225zd98b6mvXrt27\nvm7Q1zVs2DB//PHH3T0I+rZt2+6z7Ze//GVfvny5V1dX++GHH+6rV6/e7xjjx4/3kSNH7rNs0KBB\n/vTTT9f7nk0Nek2BICJNFnYHlf79+9OlSxdmzZrFGWecQUlJCTNnzgRg+fLl/PSnP2XNmjXs3r2b\n3bt3c/nll++zf7du3Ro89rx583jggQcoLy+nurqaL774gq9//et713fq1Gmf6wBt27aloqKCLVu2\nsGvXLnr06LHfMdetW8cf//hH5syZAwQn3Hv27OHCCy9ssI5Fi2D9+n0fyVIbvYhkhREjRjB16lSm\nTZvGoEGD6NKlCwBXXXUVw4YNY8OGDWzfvp0xY8bs13WyoR42u3fv5nvf+x533XUXmzdvZtu2bQwe\nPDiprpedO3fmsMMO4/33399vXffu3Rk5ciRbt25l69atbNu2jc8++4y77rqrweP9538GYf/FF9Cn\nD9x8c8IS9lLQi0hWGDlyJAsXLuTJJ59k1KhRe5dXVFTQsWNHWrduzYoVK3juuef22a++0K5ZVvMX\nQOfOncnLy2PevHkUFxcnVY+Zce2113LbbbexadMmqqurWbZsGZWVlVx99dXMmTOH4uJiqqur2blz\nJ0uWLGHjxo0NHi8ahWeegQcfhB/9CL797aTKABT0IpIl8vPzOeecc9ixYwdDhgzZu/xXv/oV9913\nHx06dODnP/85V1xxxT771Xc2X7OsXbt2PP7441x++eUcddRRTJ8+naFDhx6wjvjjTZgwgVNPPZUz\nzzyTTp06cffdd1NdXU23bt0oKiriwQcfpEuXLuTn5zNhwoRG9/lPlmavFJGENHtluDR7pYiIHJCC\nXkQkyynoRUSynIJeRCTLKehFRLJcUkFvZgVmVmZm5WY2toFtIma20szWmNniuOVrzezN2LoVqSpc\nRESSk3AKBDPLA54ABgIbgRIzK3L3srhtOgD/BVzs7hvMrHPcIaqBiLtvS23pItJS8vPzs2p+9kyT\nn5/fpP2TmeumL/Cuu68DMLPpwFCgLG6bHwDPu/sGAHffErfOUBORSEZbu3YtABUVUF4OZWX7Pt57\nDzp3hhNP3P9x7LGgz4hwJRP0XYH46XM+Jgj/eL2A1rEmm3bA4+7+TGydAwvMrAqY5O6/bWLNItKM\n3GHTpv3DvKwMtmyBr361NsSHDQu+9uoF7dqFXbk0JFWzV7YCTgcuBI4AXjGzV9z9PaC/u28ysy4E\ngV/q7kvrO0j87cAikQiRSCRF5YlIXe7w7ruwZs3+gX744fuelQ8eHHw97jg45JCwK89d0WiUaDTa\n6P0SToFgZv2AQncviL2+m2AO5PFx24wFDnP3+2OvnwTmufvzdY41DvjM3SfW8z6aAkGkGVVXB6G+\nZAm89FLwOPRQOP30IMR79679etRRYVcryUh2CoRkgv4Q4B2Ci7GbgBXAle5eGrfNicAvgQLgUGA5\ncAWwFshz9wozOwIoBu539/2mf1PQi6RWZSWsXFkb6kuXQpcucP75tY8mXuOTkCUb9Ambbty9ysxu\nIQjpPOApdy81szHBap/k7mVmNh9YBdS0xb9tZicAM83MY+/1bH0hLyJNt3MnrFhRG+zLlsEJJwSB\nPmIETJoExxwTdpUSBs1eKZKhKirglVdqg/211+CUU2rP1vv3VxNMtktZ001LUdCLHNi2bUHzS02w\nv/VW0L5eE+xnnw1f+lLYVUpLUtCLZLhPPoGXX64N9g8/hH79aoO9b1847LCwq5QwKehFMsxHH9WG\n+pIlsHkznHtubbD36QOtW4ddpaQTBb1IGqvpw14T7C+9FNz0Ob5HzNe+pj7rcmAKepE0Ul0dtKnH\n92Fv0wYuuKA22Hv10lQB0jgKepEQucMHH8CiRcFj8WJo3x4ikdpwVx92aSoFvUgL27QJXnyxNtwr\nK2HgwOBx4YXB9AEiqaSgF2lm27YFTTE1wf7JJ8EZe0249+6tphhpXgp6kRTbsQP++tfaYC8rg3PO\nqQ32007TxVNpWQp6kSaqrISSktpgf/XVIMxrgv2ss4JJwUTCoqAXaaTqali1qradfelS6NGjNtjP\nO09zrkt6UdCLJOAe3Blp0aIg3Bcvho4da4M9EgnumiSSrhT0IvXYuLE22BctgqqqfXvGdO8edoUi\nyVPQiwBbt0I0Whvs//gHDBgQhPrAgRqkJJlNQS85yR3eeAOmTw+Cvbw8mK63JthPOw3ydKt6yRIK\neskp//gHPPssTJkC//oXXH01DBoU9Ixp0ybs6kSah4Jest7u3TB3LkyeHAxcGjoUrrkmmGJAZ+2S\nC1J2K0GRdPPGG8GZ+3PPBTezvvZamDZNN90QaYiCXjLC5s1BsE+ZElxgHTUK/vY36Nkz7MpE0p+a\nbiRtVVbCvHlB08zixTBkSNA0E4moaUYE1EYvGWzVquDM/dlng+6P11wDl18eTPMrIrXURi8ZZcsW\n+P3vg4DfvBlGjgymIPjqV8OuTCTz6YxeQlNZCS+8EIT7okXw7W8HZ+8DBmgWSJFkJHtGn1RLp5kV\nmFmZmZWb2dgGtomY2UozW2Nmixuzr+SWNWvgjjuC6QYeeggKCmDduqDnzEUXKeRFUi3hGb2Z5QHl\nwEBgI1ACDHf3srhtOgB/Ay529w1m1tndtySzb9wxdEafxbZuDZpmJk8ObtAxalTw6NUr7MpEMlcq\n2+j7Au+6+7rYgacDQ4H4sP4B8Ly7bwBw9y2N2Fey1J49MH9+0DSzYAFccgk8+GAwFYHO2kVaTjJB\n3xVYH/f6Y4IAj9cLaB1rsmkHPO7uzyS5r2SZt98Owv2ZZ+D444N299/+Fo48MuTCRHJUqnrdtAJO\nBy4EjgBeMbNXGnuQwsLCvc8jkQiRSCRF5Ulz27YtmEhs8mTYsCHoNbN4cTByVURSIxqNEo1GG71f\nMm30/YBCdy+Ivb4bcHcfH7fNWOAwd78/9vpJYB6wIdG+ccdQG30GevVVeOSRoImmoCA4e//mN9U0\nI9ISUtnrpgToaWb5ZtYGGA7MrrNNEXCumR1iZm2Bs4DSJPeVDLR+PYwYEYxWPfdc+PDD4Iy+oEAh\nL5JuEjbduHuVmd0CFBN8MDzl7qVmNiZY7ZPcvczM5gOrgCpgkru/DVDfvs31zUjzq6iA8ePhV7+C\nm26Cd97RZGIi6U4DpiQpVVXBBdb77gt6zTz4oG67JxI2TYEgKbNwIdx+ezDXTFERnHlm2BWJSGMo\n6KVBZWVw551Bd8mHH4bLLtP9VUUykSZ7lf1s2QK33ALnnRfMO/P22/Dd7yrkRTKVgl722rULJkyA\nk04K5nsvLYXbboNDDw27MhFpCjXdCO7w/PMwdiyccgq8/LIGOolkEwV9jluxIjhrr6iASZOCHjUi\nkl3UdJOjPvoIrr4ahg2D666D115TyItkKwV9jvnsM7j3XujTB3r0gPLyIOg1mlUkeynoc0RVVTCD\nZO/ewdn8G2/AAw9Au3ZhVyYizU1t9DlgwYJgwNORR8Ls2XDGGWFXJCItSUGfxUpLg1v2vfNOMODp\n0kvVF14kF6npJgtt3gw33wznnx9cYH3rLY1qFcllCvossmtXMDf8SSdBq1bBFAYa8CQiarrJAu4w\nY0Yw4OnUU+Gvfw0uuoqIgII+4y1fHpy1f/45PPkkXHhh2BWJSLpR002G+ugjuOqqoO39+uuDAU8K\neRGpj4I+w3z6KdxzTzDgqWfPoEfNtddqwJOINExNNxmkqAhuvBEuvhjefBO6dQu7IhHJBAr6DDF/\nPoweHYT92WeHXY2IZBLdMzYDLFsG3/kOzJwJ554bdjUiki6SvWes2ujT3FtvwdChMHWqQl5EDo6C\nPo2tXQsFBTBxIlxySdjViEimUtCnqb//PbjoeuedQTdKEZGDlVTQm1mBmZWZWbmZja1n/QVmtt3M\nXo897o1bt9bM3jSzlWa2IpXFZ6t//QsGD4Yrr4Qf/zjsakQk0yXsdWNmecATwEBgI1BiZkXuXlZn\n05fcfUg9h6gGIu6+rcnV5oAvvoAhQ4KeNYWFYVcjItkgmTP6vsC77r7O3SuB6cDQerZr6MqvJfk+\nOW/PHhg+HL7yFfjlLzXbpIikRjIB3BVYH/f649iyus42szfM7C9mdnLccgcWmFmJmY1uQq1Zrbo6\nmMpg166gh02ePhpFJEVSNWDqNeA4d99hZoOBWUCv2Lr+7r7JzLoQBH6puy+t7yCFcW0VkUiESCSS\novLSm3tw0fWdd2DhQmjTJuyKRCQdRaNRotFoo/dLOGDKzPoBhe5eEHt9N+DuPv4A+3wIfMPdt9ZZ\nPg74zN0n1rNPzg6YeughmDYNXnoJjjoq7GpEJFOkcsBUCdDTzPLNrA0wHJhd582Ojnvel+ADZKuZ\ntTWzdrHlRwAXA2sa8X1kvd/+Fn7zm2CKA4W8iDSHhE037l5lZrcAxQQfDE+5e6mZjQlW+yTge2b2\nI6AS+AK4Irb70cBMM/PYez3r7sXN8Y1kohkzYNw4WLIEutZ31UNEJAU0101IFi6EH/wgOJPv0yfs\nakQkEyXbdKPZK0OwYkUwGOr55xXyItL81ImvhZWWBgOinnoKzj8/7GpEJBco6FvQRx/BoEHw8MNB\n2IuItAQFfQvZvDmYpOwnP4GRI8OuRkRyiS7GtoDPPoMBA4Kz+V/8IuxqRCRbJHsxVkHfzHbuhG99\nK7iR969/rflrRCR1FPRpYM8e+P734ZBDYPr04KuISKqoe2XI3OHGG4Nmmz//WSEvIuFR0DeTe+6B\n1ath0SI49NCwqxGRXKagbwYTJkBRUTBJWbt2YVcjIrlOQZ9ikyfDE0/A0qXQuXPY1YiIKOhTatas\noMkmGoVu3cKuRkQkoKBPkWgUbrgB5s2D3r3DrkZEpJZGxqbA668H3Sj/8Af4xjfCrkZEZF8K+iYq\nLw8GRP3mN8HoVxGRdKOgb4KPPw7mr/nFL+DSS8OuRkSkfgr6g/TPfwZz19x8M1x3XdjViIg0TFMg\nHISKCrjoIrjgAhjf4C3SRUSal+a6aSa7dsF3vgPHHRfc2FuTlIlIWBT0zaCqKrjPa2Ul/PGP0Eqd\nU0UkRJrULMXcg/b4zZth7lyFvIhkDsVVkn72M3j1VXjxRTjssLCrERFJnoI+CY89FjTVLF0K7duH\nXY2ISOMk1b3SzArMrMzMys1sbD3rLzCz7Wb2euxxb7L7prtnnoGJE2HBAujSJexqREQaL+HFWDPL\nA8qBgcBGoAQY7u5lcdtcANzu7kMau2/ctml3MXbOHBg9GhYvhpNOCrsaEZF9JXsxNpkz+r7Au+6+\nzt0rgenA0Preswn7pp2XX4Yf/hBmz1bIi0hmSybouwLr415/HFtW19lm9oaZ/cXMTm7kvmllx46g\nG+XTT0PfvmFXIyLSNKm6GPsacJy77zCzwcAsoFdjD1JYWLj3eSQSIRKJpKi8xpk4Ec45BwoKQnl7\nEZF6RaNRotFoo/dLpo2+H1Do7gWx13cD7u4NDv43sw+BbxCEfVL7pksb/SefwCmnQEkJ9OgRdjUi\nIg1LZRt9CdDTzPLNrA0wHJhd582Ojnvel+ADZGsy+6abwkK45hqFvIhkj4RNN+5eZWa3AMUEHwxP\nuXupmY0JVvsk4Htm9iOgEvgCuOJA+zbT99Jkb70Ff/oTvPNO2JWIiKSO5rqJ861vwTe/CbfeGmoZ\nIiJJ0Vw3jbRwYXAmP3Nm2JWIiKSWbjxCMCvlHXcEc8u3aRN2NSIiqaWgJ5jm4Igj4LLLwq5ERCT1\ncr6NfscO6NULZsyAfv1a/O1FRA5aKrtXZrVHH4Vzz1XIi0j2yukz+prBUa++Ciec0KJvLSLSZLqV\nYBLGjIEvfQkmTGjRtxURSQl1r0xgzZqgK6UGR4lItsvZNvq77oL/+A/o2DHsSkREmldOntEvWADl\n5TBrVtiViIg0v5w7o9fgKBHJNTkX9E8/HVyA1eAoEckVOdXr5vPPoXdveP55OOusZn0rEZFmpwFT\n9Zg4MRgcpZAXkVySM2f0mzbB176mwVEikj00YKqOG26ADh3gkUea7S1ERFqUBkzFWbMGioqgrCzs\nSkREWl5OtNHfeacGR4lI7sr6oC8uhvfegxtvDLsSEZFwZHXQV1UFZ/MaHCUiuSyrg37qVGjfHi69\nNOxKRETCk7W9bj7/PLhz1J/+pH7zIpKdcn7A1KOPwvnnK+RFRJIKejMrMLMyMys3s7EH2O5MM6s0\ns8vilq01szfNbKWZrUhF0Yls2gSPPw4PPtgS7yYikt4S9qM3szzgCWAgsBEoMbMidy+rZ7uHgPl1\nDlENRNx9W2pKTuxnP4PrrtMIWBERSG7AVF/gXXdfB2Bm04GhQN3hR/8GzADOrLPcaMEmotWrYfZs\n3TlKRKRGMgHcFVgf9/rj2LK9zOwrwDB3/2+CYI/nwAIzKzGz0U0pNhk1d4468sjmficRkcyQqikQ\nHgPi2+7jw76/u28ysy4EgV/q7kvrO0hhYeHe55FIhEgk0qgiiovh/fc1OEpEslM0GiUajTZ6v4Td\nK82sH1Do7gWx13cD7u7j47b5oOYp0Bn4HLjB3WfXOdY44DN3n1jP+zSpe2VVFfTpA/ffr37zIpIb\nUtm9sgToaWb5ZtYGGA7sE+Du3iP2OIGgnf4md59tZm3NrF2soCOAi4E1jf1mkjF1atBcM2xYcxxd\nRCRzJWy6cfcqM7sFKCb4YHjK3UvNbEyw2ifV3SXu+dHATDPz2Hs96+7FKap9r4oKuO8+mDkTLOFn\nm4hIbsmKkbH33x/0snnuuRQXJSKSxnLmxiM1d4567TU4/vjU1yUikq5yJuivvx6OOgoefrgZihIR\nSWM5cYepVatgzhwNjhIROZCMntTsrrvg3ns1OEpE5EAyNujnz4cPPoAxY8KuREQkvWVk0FdVwR13\nBO3yunOUiMiBZWTQT5kS3Oh76NCwKxERSX8Z1+umogJ694ZZs+DMuvNkiojkkKy9w9SECRCJKORF\nRJKVUWf0GzfCqadqcJSICGTpgKnrr4dOnWD8+ANuJiKSE7JuwJQGR4mIHJyMaaO/885ghkoNjhIR\naZyMCPr58+HDDzU4SkTkYKR90McPjmrdOuxqREQyT9oH/eTJweyUGhwlInJw0rrXTUUF9OoFRUXq\nNy8iUldWDJh65BEYMEAhLyLSFGl7Rr9hA3z96/D665CfH2JhIiJpKuMHTP3wh9ClCzz0UIhFiYik\nsYweMLVqFfzlLxocJSKSCmnXRu8edKe87z7o0CHsakREMl/aBf38+bBuHdxwQ9iViIhkh6SC3swK\nzKzMzMrNbOwBtjvTzCrN7LLG7guwZ48GR4mIpFrCoDezPOAJYBBwCnClmZ3YwHYPAfMbu2+NyZOD\n2SmHDGnstyEiIg1J5oy+L/Cuu69z90pgOlDfONV/A2YA/ziIfQEYNw4efRQs4TVkERFJVjJB3xVY\nH/f649iyvczsK8Awd/9vwBqzb7wLL4QzzkiiIhERSVqqulc+Bhyw/T0ZnTsXUlgYPI9EIkQikaYe\nUkQka0SjUaLRaKP3Szhgysz6AYXuXhB7fTfg7j4+bpsPap4CnYHPgRsImnEOuG/cMZK6ObiIiARS\nOWCqBOhpZvnAJmA4cGX8Bu7eI+6NJwNz3H22mR2SaF8REWleCYPe3avM7BagmKBN/yl3LzWzMcFq\nn1R3l0T7pq58ERFJJG3nuhERkQPLimmKRUSk6RT0IiJZTkEvIpLlFPQiIllOQS8ikuUU9CIiWU5B\nLyKS5RT0IiJZTkEvIpLlFPQiIllOQS8ikuUU9CIiWU5BLyKS5RT0IiJZTkEvIpLlFPQiIllOQS8i\nkuUU9CIiWU5BLyKS5RT0IiJZTkEvIpLlFPQiIlkuqaA3swIzKzOzcjMbW8/6IWb2ppmtNLMVZtY/\nbt3a+HWpLF5ERBJLGPRmlgc8AQwCTgGuNLMT62y20N3/r7v3AX4IPBm3rhqIuHsfd++borpbRDQa\nDbuE/aim5KRjTZCedamm5KRjTclK5oy+L/Cuu69z90pgOjA0fgN33xH3sh1BuNewJN8n7aTjP6xq\nSk461gTpWZdqSk461pSsZAK4K7A+7vXHsWX7MLNhZlYKzAGui1vlwAIzKzGz0U0pVkREGi9lZ9ru\nPsvdTwKGAT+PW9Xf3U8HLgFuNrNzU/WeIiKSmLn7gTcw6wcUuntB7PXdgLv7+APs8z5wprtvrbN8\nHPCZu0+sZ58DFyIiIvtxd0u0TaskjlMC9DSzfGATMBy4Mn4DM/s/7v5+7PnpQBt332pmbYE8d68w\nsyOAi4H7D7ZYERFpvIRB7+5VZnYLUEzQ1POUu5ea2ZhgtU8CvmtmI4HdwBfA92O7Hw3MjJ2ttwKe\ndffi5vhGRESkfgmbbkREJLOF3u0x0WCsMJjZU2b2dzNbFXYtNcysm5m9aGZvmdlqM/txGtR0qJkt\njw2GWx27BpMWzCzPzF43s9lh1wLpOXDQzDqY2f+YWWns9+qsNKipV+xn9Hrs67/S5Hf9J2a2xsxW\nmdmzZtYmDWr699j/u8R54O6hPQg+aN4D8oHWwBvAiWHWFKvrXOA0YFXYtcTVdAxwWux5O+CdNPlZ\ntY19PQRYBvQNu6ZYPT8BpgGzw64lVs8HQMew66hT0xTg2tjzVkD7sGuqU18esBHoHnIdX4n9+7WJ\nvf4DMDLkmk4BVgGHxv7vFQM9Gto+7DP6hIOxwuDuS4FtYdcRz90/cfc3Ys8rgFLqGc/Q0rx2sNyh\nBGERelugmXUj6M77ZKJtW1BaDRw0s/bAee4+GcDd97j7pyGXVddFwPvuvj7hls3vEOAIM2sFtCX4\nAArTScByd9/l7lXAS8BlDW0c9i9eUoOxZF9mdjzBXxzLw61kbxPJSuATYIG7l4RdE/D/gDtJgw+d\nOOk2cPAEYIuZTY41k0wys8PDLqqOK4Dfh12Eu28EHgU+AjYA2919YbhVsQY4z8w6xno3XgJ0b2jj\nsINeGsnM2gEzgH+PndmHyt2rPZjjqBtwlpmdHGY9ZvYt4O+xv34s9kgH6TZwsBVwOvBfsbp2AHeH\nW1ItM2sNDAH+Jw1qOZKgpSGfoBmnnZn9IMya3L0MGA8sAOYCK4GqhrYPO+g3AMfFve4WWyb1iP3Z\nOAN4xt2Lwq4nXuzP/sVAQcil9AeGmNkHBGeDA8zs6ZBrwt03xb5uBmYSNFuG6WNgvbu/Gns9gyD4\n08Vg4LXYzytsFwEfuPvWWDPJn4BzQq4Jd5/s7me4ewTYDpQ3tG3YQb93MFbsKvZwIC16SZBeZ4M1\nfge87e7/P+xCAMyss5l1iD0/HPgmUBZmTe5+j7sf5+49CH6fXnT3kWHWZGZtY3+JETdwcE2YNbn7\n34H1ZtYrtmgg8HaIJdV1JWnQbBPzEdDPzA4zMyP4WZWGXBNm1iX29TjgUuC5hrZNZmRss/EGBmOF\nWROAmT0HRIBOZvYRMK7molWINfUHrgJWx9rEHbjH3V8IsaxjgamxqazzgD+4+9wQ60lX6Tpw8MfA\ns7Fmkg+Aa0OuBwg+GAnOom8IuxYAd19hZjMImkcqY18nhVsVAM+b2VEENd10oIvpGjAlIpLlwm66\nERGRZqae746mAAAALElEQVSgFxHJcgp6EZEsp6AXEclyCnoRkSynoBcRyXIKehGRLKegFxHJcv8L\npmvhyrL681sAAAAASUVORK5CYII=\n"
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"scrolled": true,
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "df = pd.DataFrame(tsvd_data)\nplt.scatter(x=df[0], y = df[1])",
"execution_count": 33,
"outputs": [
{
"metadata": {},
"execution_count": 33,
"data": {
"text/plain": "<matplotlib.collections.PathCollection at 0x13d7767f0>"
},
"output_type": "execute_result"
},
{
"metadata": {},
"data": {
"text/plain": "<matplotlib.figure.Figure at 0x12b500f60>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEACAYAAABGYoqtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH+hJREFUeJzt3X901fWd5/HnO4RIlB+FNoJCTLSChJ26VdcMM7ZjmEpq\nna6wxx2m7fyoLbprHdC2zow6PXuks91RPDvb6dRxOm2ZSmvkx9RpZTlMDWxN2+EUwygW2kRI1Rv5\nGW5RsSCYAO/94/ONXGIukNzvvd/vja/HOTn3+/3k/njne5LvO5/f5u6IiIgMpiLpAEREJL2UJERE\nJC8lCRERyUtJQkRE8lKSEBGRvJQkREQkr1iShJllzOxnZrbFzNqjsolm1mpm283sSTObkPP8e82s\ny8w6zaw5jhhERCR+cdUkTgBN7n6FuzdGZfcAG9z9MuCHwL0AZjYLWAA0AB8BHjYziykOERGJUVxJ\nwgZ5r3nA8uh4OTA/Or4RWOnux9w9A3QBjYiISOrElSQcWG9mm83slqhssrv3ALj7PuD8qHwqsDPn\ntbujMhERSZnKmN7nGnffa2Y1QKuZbSckjlxa/0NEpMzEkiTcfW/0mDWz7xOaj3rMbLK795jZFGB/\n9PTdQG3Oy6dFZW9jZkosIiLD4O6x9PUW3NxkZuea2djo+DygGdgGrAFujp72SeCJ6HgN8DEzqzKz\ni4FLgfZ87+/uqf+67777Eo9hJMSoOBVn2r/KJc44xVGTmAx8L/qvvxJocfdWM/t3YLWZfRroJoxo\nwt07zGw10AH0Abd73D+ViIjEouAk4e4vAe8fpPwV4Lo8r7kfuL/QzxYRkeLSjOsYNDU1JR3CGZVD\njKA446Y441UuccbJ0tzSY2ZqiRIRGSIzw9PScS0iIiOXkoSIiOSlJCEiInkpSYiISF5KEiIikpeS\nhIiI5KUkISIieSlJiIhIXkoSIiKSl5JEArLZLJs3byabzSYdiojIaSlJlNiKFauoq5vJ3Lm3UVc3\nkxUrViUdkohIXlq7qYSy2Sx1dTM5cuQp4HJgK9XVc+jufp6ampqkwxOREUJrN5WpTCZDVVU9IUEA\nXM7o0XVkMpnkghIROQ0liRKqr6+ntzcDbI1KttLX1019fX1yQYmInIaSRAnV1NSwbNnDVFfPYfz4\nK6munsOyZQ+rqUlEUkt9EgnIZrNkMhnq6+uVIEQkdnH2SShJiIiMMOq4FhGRklCSKDFNpBORcqIk\nUUL9E+nmzLlVE+lEpCyoT6JEstks06ZNp7f3x/RPpKuq+h127epS57WIxEp9EmVoy5Yt9PbWkDuR\nrrf3PWzZsiXJsERETktJoqT2kDuRDvYmGIuIyJlVJh3AO0VtbS1wDGgC6oEMcCwqFxFJJ9UkSuTQ\noUNUV08FHDgMOGPGXMihQ4cSjkxEJD/VJEokrM90EPgecB5wGLObtG6TiKRabDUJM6sws2fNbE10\nPtHMWs1su5k9aWYTcp57r5l1mVmnmTXHFUOanVy36SbGj//vVFffpHWbRCT1YhsCa2afA64Cxrv7\njWa2FDjg7g+a2d3ARHe/x8xmAS3A1cA0YAMwfbCxriNpCGw/rdskIsWWuiGwZjYNuAH4Zk7xPGB5\ndLwcmB8d3wisdPdj7p4BuoDGOOIoBzU1NVx99dVKECJSFuJqbvoy8OeEXtl+k929B8Dd9wHnR+VT\ngZ05z9sdlYmISMoUnCTM7PeAHnd/Djhd9WZktRsNkdZsEpFyFMfopmuAG83sBqAaGGdm3wH2mdlk\nd+8xsynA/uj5u4HcyQHTorJBLVmy5K3jpqYmmpqaYgi5tFasWMXChbdTVRV2plu27GE+/vE/SDos\nERkh2traaGtrK8p7x7p2k5ldC9wVdVw/SOi4Xpqn4/o3Cc1M6xnBHdfZbJa6upkcOfIU/Ws2VVfP\nobv7efVLiEhRpK7jOo8HgLlmth34UHSOu3cAq4EOYB1we9lngtPIZDJUVdWTu2bT6NF1ZDKZ5IIS\nETlLWgW2yFSTEJFSK5eahJA7iW4O48dfSXX1HE2iE5GyoZpEiWgSnYiUSpw1CSUJEZERRs1NIiJS\nEkoSJaLJdCJSjpQkSmDFilXU1c1k7tzbqKubyYoVq5IOSUTkrKhPosg0BFZESk19EmVEk+lEpJwp\nSRRZfX1Yrwm2RiVb6evr1o50IlIWlCSKrKamhoUL/wiYDcwAZrNw4R+pqUlEyoL6JIrsZJ/E4/Tv\nbV1dfZP6JESkaOLsk4hjqXA5jf4+iSNHmt4q6++TUJIQkbRTc1ORqU9CRMqZkkSRaYE/ESln6pMo\nES3wJyKlogX+REQkL02mExGRklCSKAEt7ici5UpJosi0uJ+IlDP1SRSRFvcTkSSoT6JMaHE/ESl3\nShJFpIl0IlLulCSKSBPpRKTcqU+iBDSRTkRKSZPpREQkL3Vci4hISShJiIhIXkoSIiKSl5KEiIjk\nVXCSMLNzzOxpM9tiZtvM7L6ofKKZtZrZdjN70swm5LzmXjPrMrNOM2suNAYRESmOWEY3mdm57v6G\nmY0CNgJ3ADcBB9z9QTO7G5jo7veY2SygBbgamAZsAKYPNoxJo5tERIYudaOb3P2N6PAcwr7ZDswD\nlkfly4H50fGNwEp3P+buGaALaIwjDhERiVcsScLMKsxsC7APWO/um4HJ7t4D4O77gPOjp08Fdua8\nfHdUJiIiKVMZx5u4+wngCjMbD3zPzP4DoTZxytOG895Llix567ipqYmmpqZhRikiMjK1tbXR1tZW\nlPeOfca1mf0P4A3gFqDJ3XvMbArwlLs3mNk9gLv70uj5PwDuc/enB3mvsu+T6OzspL29ncbGRhoa\nGpIOR0TeAVLVJ2Fm7+kfuWRm1cBcoBNYA9wcPe2TwBPR8RrgY2ZWZWYXA5cC7YXGkUaLF3+WWbOu\n4uab/5pZs65i8eI7kw5JRGRICq5JmNn7CB3TFdHXKnf/X2Y2CVgN1ALdwAJ3fy16zb3AQqAPuNPd\nW/O8d9nWJDo7O5k16ypgE/0bDsFsOjqeUY1CRIoqzppEwX0S7r4NuHKQ8leA6/K85n7g/kI/O83a\n29sJ+fHkhkMwjfb2diUJESkbmnFdJI2NjYRBXCc3HIJdUbmISHlQkiiShoYGFi26FZgNzABms2jR\nrapFiEhZ0X4SRabRTSJSatp0SERE8krVEFg5vZaWFubNm0dLS0vSoYiIDJlqEkVUW3sJu3btI6xj\nuIva2sm8/PJLSYclIiOcahJloKWlJUoQm4AdwCZ27uxRjUJEyoqSRJGsXr2aUIPInScxNSoXESkP\nShJF8uKL3cAuTp0nsZsFCxYkF5SIyBCpT6IINm7cyAc+MBdYAiwl1Ci6qKmZwP79exONTURGPvVJ\npFxrayshMfwF8DzwTWAKn/nMf0s0LhGRoVKSKILm5mZONjXVEDbs2xeVi4iUDzU3FcmHP3wDra1t\nhE33dtPc3MSTT65LOCoReSdQc1MZmDFjBmEzvjcBZ8aM6QlHJCIydKpJFMHJvSTWAecBh4EbtJeE\niJSEahIpF/aSeBdwE3Bb9DghKhcRKR9KEkVQWVkJvAY8BTwTPR6MykVEyoeSRBE8/vj3CB3WFwCb\no8ep7NixI9G4RESGSv/axiybzbJu3XpCh/VlwMXAS8BhDYEVkbKjmkTMMpkMFRXnEy5tG6G5qQ0Y\nRSaTSS4wEZFhUJKIWX19PW+++TJhxvWpzU1a3E9Eyo2am2JWU1PDmDHn8MYb3cB04BIgAxxmwYIl\nSYYmIjJkqknErKWlhTfeeAMYBfwYeJb+5iYRkXKjJBGzr33ta4QK2gy0l4SIlDsliZjV1NQAFwI7\n0V4SIlLutCxHzLSXhIgkLc5lOZQkiuDkCrBTgD38xm/MZNu25xKOSkTeKZQkysDGjRtpbW2lubmZ\na665JulwROQdJFVJwsymAd8GJgMngG+4+9+Z2URgFVBHGAO6wN0PRq+5F/g0cAy4091b87x32SYJ\nEZGkpC1JTAGmuPtzZjaWMMV4HvAp4IC7P2hmdwMT3f0eM5sFtABXExrsNwDTB8sGShIiIkOXqqXC\n3X2fuz8XHR8COgk3/3nA8uhpy4H50fGNwEp3P+buGaALaCw0DhERiV+sQ2DNrB54P7AJmOzuPRAS\nCXB+9LSphPGh/XZHZSIikjKxLcsRNTV9l9DHcMjMBrYTDavdaMmSJW8dNzU10dTUNNwQRURGpLa2\nNtra2ory3rGMbjKzSmAt8K/u/pWorBNocveeqN/iKXdvMLN7AHf3pdHzfgDc5+5PD/K+6pMQERmi\nVPVJRP4J6OhPEJE1wM3R8SeBJ3LKP2ZmVWZ2MXApoH09RURSKI7RTdcQVrLbRmhScuAvCTf+1UAt\n0E0YAvta9Jp7gYVAHyN0CGxnZyft7e00NjbS0NCQdDgi8g6SqiGwxVSuSWLx4s/y0ENfJ+THnSxa\ndCtf/epXzvQyEZFYKEmkWGdnJ7NmXUUY4HU5YXG/2XR0PKMahYiURBr7JCSyYcMGwjSRU5cJD+Ui\nIuVFSSJmkydPBnYxcJnwUC4iUl7U3BSzbDbLlCkXceKEEWoUu6iocPbteznaa0JEpLjU3JRiNTU1\n3HbbLcCbVFYe4JxzRvPoo48oQYhIWVJNIma1tZewa9c++msRF1zwHvbseTnpsETkHUQ1iZRqaWmJ\nEsQmYAewib17f0VLS0vCkYmIDI+SRIxWr17NYCObQrmISPlRkojRtGkXMdjIpgULFiQXlIhIAdQn\nEZNsNktd3UyOHKkEfk1Y/Xy3+iREpOTUJ5FCmUyGY8dGA68DY4GXgAqeeOLxZAMTESlAbPtJvNP1\n9vbS1/cacC5hzaZjwGF6e3uTDUxEpACqScTkRz/6ETAK+Bfga9FjZVQuIlKeVJOI1XjgJqAeyADj\nkgxGRKRgqknE5PLLLwcOAk8Bz0SPr0flIiLlSUkiJgcOHCCMaDp1jkQoFxEpT0oSMXnppW5gNwPn\nSFx66aXJBSUiUiAliRhks1m+9KUHgOPA7wCXRY/HaW/X9t0iUr6UJGKQyWQ4fvwE8G7CCKfq6HES\n69atSzQ2EZFCKEnEIMyFeBN4jdBh/Vz0eJATJ04kGZqISEGUJGLwy1/+kjD8dSpwAbA5epzKRRdd\nlGRoIiIF0TyJGDQ2NgJHgZ2E/oiLCctyHOaWW25JMjQRkYKoJhGDhoYGrr32A4TL2UaYJ9GGWSUz\nZsxINDYRkUIoScQgm83y059uBmaQO0/C/QK2bNmSYGQiIoVRkojBP/7jN+jtfTehuSl3nsQeXn5Z\ny4SLSPlSn0SBwhyJpUAf8GfABwm7070EjOPIkSNJhiciUhDVJAqUyWRwH0dYGvx/A5MJCeIG4Ndc\nd911SYYnIlIQ1SQKNHbsWHp7DxA2Gmoj9ElsBWbz8Y/fRENDQ5LhiYgUJJaahJktM7MeM9uaUzbR\nzFrNbLuZPWlmE3K+d6+ZdZlZp5k1xxFDUnbu3Am8hzDs9eQciVGjavnc5+5INDYRkULF1dz0LeDD\nA8ruATa4+2XAD4F7AcxsFrAAaAA+AjxsZrHsxZqcA8AOYDrwx8B0jh/PMHbs2GTDEhEpUCxJwt3/\nDXh1QPE8YHl0vByYHx3fCKx092PungG6gMY44kjCFVdcQUWFEzquDRgTPcK2bdsSjExEpHDF7Lg+\n3917ANx9H3B+VD6VMFa03+6orCzV1NTw+78/n3Apf0RYtylsZRqW6xARKV+l7Lj24bxoyZIlbx03\nNTXR1NQUUzjxaWy8mlWrNnPqhkMXMmbMmASjEpF3ira2Ntra2ory3sVMEj1mNtnde8xsCrA/Kt8N\n1OY8b1pUNqjcJJFWP/7xj4G9hFFN/aOb9mpXOhEpiYH/QH/xi1+M7b3jbG4y+hvjgzXAzdHxJ4En\ncso/ZmZVZnYxcClQtjvzZLNZnnhiHWGeRBNwZfR4TLOtRaTsxVKTMLPHCHfGd5vZy8B9wAPAP5vZ\np4Fuwogm3L3DzFYDHYTe3tvdfVhNUWmQyWQ4mR+PEipMx4BJ7Nu3L8HIREQKF0uScPdP5PnWoNON\n3f1+4P44PjtpYcOhUcAmwjyJ9cDtwGtcfPHFSYYmIlIwLctRoDCCaRqhL6IG+ARhcl2fluQQkbKn\nJFGgsOHQLgau/gqjmDNnTmJxiYjEQWs3FaihoYGJE8fy6quzCdM9dgNv8thjj1FTU5NwdCIihVGS\nKNDatWt59dVDwDrgPOAwcAPjxp2XbGAiIjGwNA8sMrPUD3xqbm5m/foXgZ8CGaAe+C3e975z2bp1\n6+leKiJSFGaGu8eyJp5qEgXauXM3oU9iOjAF2AccYdu2MIdCTU4iUs7UcV2AbDbL9u0vRmcVhOam\nCsKQ2Artby0iZU81iQKEXelGA8eBfwIOAhOAP4weRUTKm5JEAcJ+EUeAcYT5EbWEBW6rgFe54oor\nEoxORKRwam4qQNiVbhRhOY5NwPbosZcPfeiDZ9Uf0dnZyfLly+ns7CxqrCIiw6EkUYCwgN8Y4EJO\nXSZ8Kq+8cuYVYBcv/iyzZl3FzTf/NbNmXcXixXcWL1gRkWFQkijAz3/+c+BN4BVOnXG9m6NHj572\ntZ2dnTz00NfJrYE89NA3VKMQkVRRkihAd3c3YZb1PwBzCMuE/xbQy/Tp00/72vb2dkIfRm4NZFpU\nLiKSDkoSBXjhhRcI6zQ1AM8DfwacAN5FRUX+S7tx40a+//3vAy8A349KtwK7orWgRETSQaObCtDR\n0UEY2dRIGPJ6kNBH8eu8zU3NzTewfn0boR/jHOAPgHOBN1m06FYaGhpKELmIyNlRTaIAx48fJ6zV\nVAGMjx4PA5VMmPD2eRIbN26MEsQm4JeEpTzOA47y6KPf4Ktf/UqJIhcROTtKEgWrJNz0u6LH0UAv\n48aNe9szW1tbObn3BNFjPfAeduzYUYpgRUSGRElimLLZbHQ0cPjrhYDxzDPPvO35tbW1vH3viQzw\nK5qbm4sdsojIkKlPYphO7m29l3Czvzx63Av08frrr7/13BUrVrFw4e1UVdVjBu6zCclkL3CM5uYP\ncc0115T6RxAROSMliWHq6ekBHDgGXAtcBLwcnUNVVRUQahALF97OkSNPceRISCTnnPM7zJ//n6iu\nruaWW25RghCR1FKSGKbPf/7z0ZERluV4NXoMS7jv3/8rINQ4qqrqowQBcDnnnHMJd911F1dffXVp\ngxYRGSL1SQxTV1cX4fIZsAx4Bnia/kt64MAh1q5dy9ixYzl69EVy+yH6+rqpr68vfdAiIkOkJFGQ\nsG8E/A0wE+gkzMAeDUzlgQeWctVVH6CiYiIwm9GjG6io+G3e+95pPP3008mFLSJylrR96TCZGWES\n3E852WndRFg6/ChhuXCA/wnUAX9C6MOoJHRa7+F977uMrVu1MZGIxCvO7UtVkyjIBZw6/HUS0Ee4\nrBXAYuB+4JbofCwnJ9JtYtu27axdu7bUQYuInDUliYLsAdqAzdHjHuASTo4HWENYJdaAGuBiBi4p\nHtZwEhFJJ41uKkgfcANhFvUuoJeQKC4AFgK3EeZCzAb2A4c4dU7FbubPn1/6sEVEzpL6JIYp9ElU\nA+sI6y8dJiSMBkIHdj3QAzwMfAF4iZCTKwlJRH0SIlIccfZJJJYkzOx64G8JTV7L3H3pIM9JeZK4\ngNCcVE9YXqOKUGM4D/h/hFVem+jvyDY7wmWXXUJtbS133HEHH/3oRxOIXERGujiTRCLNTWZWATwE\nfIjQPrPZzJ5w9+eTiGf4XgMeIywRPgH4BGE/iV7gOeBWQmd2N3Ccn/zkh5pdLSJlJak+iUagy927\nAcxsJTCPsHNPGRlDSAy1wE5CTeIo8MfAPcB0Qg48waJFtytBiEjZSSpJTCXcVfvtIiSOMnOUMKS1\nvyN6NmEk0zFCDeJ6LrmklrVr12gzIREpSxrdVJCBe0NMJYxmugjYDbzJCy90JRSbiEjhkkoSuwl3\n0n7TorK3WbJkyVvHTU1NNDU1FTOuIerfG+LkkNYwLHYp8CZp7XQXkZGlra2Ntra2orx3IqObzGwU\nsJ3Qcb0XaAc+7u6dA56X8tFNRuiXmEp/zQFOcO654zh8+PXTvVxEpGjKflkOdz8OLAJagV8AKwcm\niLQ7mbz6CDWKkCAeffRRJQgRGTE0ma5AoUYRpD1WEXlnKPt5EiOJEoOIjGRa4E9ERPJSkhARkbyU\nJEREJC8lCRERyUtJQkRE8lKSEBGRvJQkREQkLyUJERHJS0lCRETyUpIQEZG8lCRERIBsNsvmzZvJ\nZrNJh5IqShIikoiBN+VCz4f6eZ2dnSxfvpzOzk5WrFhFXd1M5s69jbq6maxYseqM75f7+uHEUzbc\nPbVfITwRGWkee2ylV1dP8gkTrvTq6km+aNGdA87vGNL5Y4+t9I6ODn/kkUe8o6PD3d3379/v7e3t\nvn///rd93ty5H3GodpjhUO2jRp3r8DMHd/iZV1dP8v379+eNf9GiO095fXPzR94WT5Kie2c89+G4\n3qgYX0oSIiPP/v37vbp6Us5N+anohnvyJh3Onzrr88rKcQ5jBr1pjxnzLq+qmnCGzzvXYX907j5+\n/BXe3t4+aPwdHR1njPdMSabY4kwSam4SkZLKZDJUVdVzcn/484BaTt0vflpUfnbnx46dDywjbHi5\nidbWNo4ceZyDB5/h6NG/p7e3ZsDnDdyf/gJgfXS+lb6+burr6weNv729fZB4p54Sz+jRdWQymbO6\nHmmnJCEiJVVfX09vb4awLzzAYWBnzvlWwm6Ph4dwvheYG50PvGnPBfYM+LxdnPp5exgz5k8ZP/5K\nqqvnsGzZw9TU1Awaf2Nj4yDx7j4lntMlmbITV5WkGF+ouUlkROrvIxg//opT+hiGcz5mzESHqtM2\n/4wePfaU1zc39/dJTHeo9kWL7jilD+NMFi2645TX9zdv9b//SOqT0PalIpKIbDZLJpOhvr6empqa\ngs7/6q++xEMPfYPQjLSL5uYmfvKTpxk9uo6+vm6WLXuY66773VNe39nZSXt7O42NjTQ0NAw5/oGv\nHxhfkuLcvlRJQkRGhDTftEtNSUJERPKKM0mo41pERPJSkhARkbyUJEREJC8lCRERyUtJQkRE8lKS\nEBGRvJQkREQkr4KShJn9VzP7uZkdN7MrB3zvXjPrMrNOM2vOKb/SzLaa2Q4z+9tCPl9ERIqr0JrE\nNuC/AD/KLTSzBmAB0AB8BHjYzPondvwDsNDdZwAzzOzDBcaQuLa2tqRDOKNyiBEUZ9wUZ7zKJc44\nFZQk3H27u3cBA2f2zQNWuvsxd88AXUCjmU0Bxrn75uh53wbmFxJDGpTDL045xAiKM26KM17lEmec\nitUnMZWwlm6/3VHZVMIavf12RWUiIpJClWd6gpmtBybnFgEOfMHd/2+xAhMRkeTFssCfmT0F3OXu\nz0bn9xDWM18anf8AuA/oBp5y94ao/GPAte7+mTzvq9X9RESGIa4F/s5YkxiC3IDWAC1m9mVCc9Kl\nQLu7u5kdNLNGYDPwJ8Df5XvDuH5IEREZnkKHwM43s53AbGCtmf0rgLt3AKuBDmAdcHvOmt9/StiM\ndgfQ5e4/KCQGEREpnlTvJyEiIslKxYxrM3swmnT3nJk9bmbjc76X2kl5Zna9mT0fxXB3EjHkxDLN\nzH5oZr8ws21mdkdUPtHMWs1su5k9aWYTcl4z6LUtQawVZvasma1JcYwTzOyfo8/9hZn9Zkrj/Fw0\noXWrmbWYWVUa4jSzZWbWY2Zbc8qGHFex/87zxJm6+9FgceZ87y4zO2Fmk4oSZ1ybZRfyBVwHVETH\nDwD3R8ezgC2EvpN64JecrP08DVwdHa8DPlzimCuieOqA0cBzwMwEr+EU4P3R8VhgOzATWAr8RVR+\nN/DAma5tCWL9HPAosCY6T2OMjwCfio4rgQlpixO4EHgRqIrOVwGfTEOcwAeA9wNbc8qGHFex/87z\nxJm6+9FgcUbl04AfAC8Bk6KyhjjjTEVNwt03uPuJ6HQT4QcHuJH0TsprJPSpdLt7H7CSMIkwEe6+\nz92fi44PAZ2E6zgPWB49bTknr9Og17bYcZrZNOAG4Js5xWmLcTzwQXf/FkD0+QfTFmdkFHCemVUC\n1YQ5SYnH6e7/Brw6oHhIcZXi73ywONN4P8pzPQG+DPz5gLJYJzOnIkkM8GlChoN0T8obGFtqJgaa\nWT3hv45NwGR374GQSIDzo6flu7bF1v9LndsZlrYYLwZ+ZWbfiprFvm5m56YtTnffA/wN8HL0mQfd\nfUPa4sxx/hDjSsPfeWrvR2Z2I7DT3bcN+FascZYsSZjZ+qgtrP9rW/T4n3Oe8wWgz91XlCqukcbM\nxgLfBe6MahQDRyYkNlLBzH4P6IlqPKcb3pz0aIpK4Erg7939SuAwcA8pupYAZvYuwn+NdYSmp/PM\n7A8HiSvp65lPWuMC0n0/MrNq4C8J88+KKs55Eqfl7nNP930zu5nQDPG7OcW7gdqc82lRWb7yUtoN\nXJRwDKeImhy+C3zH3Z+IinvMbLK790TVzf1ReRLX8BrgRjO7gdA0Ms7MvgPsS1GMEP7D2unu/x6d\nP05IEmm6lhDazl9091cAzOx7wG+nMM5+Q40rsXjL4H70XkJ/w8/MzKLPfNbCHLR896bhxRln50oB\nnTLXA78A3j2gvL+jqIrQBJDbAbOJ0J5qhOrg9SWOeRQnO66rCB3XDQlfx28D/2dA2VLg7uh4sM7C\nt13bEsV6LSc7rh9MW4yElY1nRMf3RdcxVdcy+v3fBoyJ/g4eIcxDSkWchJvYtkJ+F0vxdz5InKm8\nHw2Mc8D3XgImFiPOov+xneUP30VYsuPZ6OvhnO/dG/2QnUBzTvlV0R9IF/CVhOK+njCKqAu4J+Fr\neA1wnJCstkTX8XpgErAhirMVeNeZrm2J4s1NEqmLEfiPhFUBngP+hTC6KY1x3hd95lZCZ/DoNMQJ\nPAbsAd4k9Jl8Cpg41LiK/XeeJ87U3Y8Gi3PA918kGt0Ud5yaTCciInmlcXSTiIikhJKEiIjkpSQh\nIiJ5KUmIiEheShIiIpKXkoSIiOSlJCEiInkpSYiISF7/H8AbScQ7L378AAAAAElFTkSuQmCC\n"
},
"output_type": "display_data"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "#kmeans cluster\n# groupby of the original dataframe\n# brandonrose.org/clustering\n",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"pygments_lexer": "ipython3",
"version": "3.5.2",
"codemirror_mode": {
"version": 3,
"name": "ipython"
},
"mimetype": "text/x-python",
"file_extension": ".py",
"nbconvert_exporter": "python",
"name": "python"
},
"gist": {
"id": "",
"data": {
"description": "projects/dnc_leaks/DNC-Modeling.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment