Created
August 24, 2018 09:04
-
-
Save pilipolio/7eb7129b033de9e8b15e6007db22b13a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\"Duration\",\"Start date\",\"End date\",\"Start station number\",\"Start station\",\"End station number\",\"End station\",\"Bike number\",\"Member type\"\r\n", | |
"267,\"2018-07-01 00:00:58\",\"2018-07-01 00:05:26\",31608,\"8th & Eye St SE / Barracks Row\",31610,\"Eastern Market / 7th & North Carolina Ave SE\",\"W23296\",\"Member\"\r\n", | |
"530,\"2018-07-01 00:01:00\",\"2018-07-01 00:09:50\",31230,\"Metro Center / 12th & G St NW\",31266,\"11th & M St NW\",\"W20838\",\"Member\"\r\n", | |
"3223,\"2018-07-01 00:01:07\",\"2018-07-01 00:54:50\",31620,\"5th & F St NW\",31289,\"Henry Bacon Dr & Lincoln Memorial Circle NW\",\"W22733\",\"Member\"\r\n", | |
"2153,\"2018-07-01 00:01:13\",\"2018-07-01 00:37:06\",31258,\"Lincoln Memorial\",31208,\"M St & New Jersey Ave SE\",\"W00196\",\"Casual\"\r\n", | |
"3214,\"2018-07-01 00:01:13\",\"2018-07-01 00:54:47\",31620,\"5th & F St NW\",31289,\"Henry Bacon Dr & Lincoln Memorial Circle NW\",\"W21070\",\"Member\"\r\n", | |
"3826,\"2018-07-01 00:01:14\",\"2018-07-01 01:05:00\",31217,\"USDA / 12th & C St SW\",31222,\"New York Ave & 15th St NW\",\"W21845\",\"Casual\"\r\n", | |
"1585,\"2018-07-01 00:01:16\",\"2018-07-01 00:27:41\",31265,\"5th St & Massachusetts Ave NW\",31520,\"24th & R St NE / National Arboretum\",\"W22025\",\"Member\"\r\n", | |
"2285,\"2018-07-01 00:01:25\",\"2018-07-01 00:39:30\",31258,\"Lincoln Memorial\",31209,\"1st & N St SE\",\"W22145\",\"Casual\"\r\n", | |
"742,\"2018-07-01 00:01:45\",\"2018-07-01 00:14:08\",31609,\"Maine Ave & 7th St SW\",31228,\"8th & H St NW\",\"W21379\",\"Member\"\r\n" | |
] | |
} | |
], | |
"source": [ | |
"! unzip -p /Users/allaingu/Downloads/201807-capitalbikeshare-tripdata.zip 201807-capitalbikeshare-tripdata.csv | head" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 98, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Duration</th>\n", | |
" <th>start_time</th>\n", | |
" <th>end_time</th>\n", | |
" <th>from_station</th>\n", | |
" <th>Start station</th>\n", | |
" <th>to_station</th>\n", | |
" <th>End station</th>\n", | |
" <th>Bike number</th>\n", | |
" <th>Member type</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>267</td>\n", | |
" <td>2018-07-01 00:00:58</td>\n", | |
" <td>2018-07-01 00:05:26</td>\n", | |
" <td>31608</td>\n", | |
" <td>8th & Eye St SE / Barracks Row</td>\n", | |
" <td>31610</td>\n", | |
" <td>Eastern Market / 7th & North Carolina Ave SE</td>\n", | |
" <td>W23296</td>\n", | |
" <td>Member</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>530</td>\n", | |
" <td>2018-07-01 00:01:00</td>\n", | |
" <td>2018-07-01 00:09:50</td>\n", | |
" <td>31230</td>\n", | |
" <td>Metro Center / 12th & G St NW</td>\n", | |
" <td>31266</td>\n", | |
" <td>11th & M St NW</td>\n", | |
" <td>W20838</td>\n", | |
" <td>Member</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3223</td>\n", | |
" <td>2018-07-01 00:01:07</td>\n", | |
" <td>2018-07-01 00:54:50</td>\n", | |
" <td>31620</td>\n", | |
" <td>5th & F St NW</td>\n", | |
" <td>31289</td>\n", | |
" <td>Henry Bacon Dr & Lincoln Memorial Circle NW</td>\n", | |
" <td>W22733</td>\n", | |
" <td>Member</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2153</td>\n", | |
" <td>2018-07-01 00:01:13</td>\n", | |
" <td>2018-07-01 00:37:06</td>\n", | |
" <td>31258</td>\n", | |
" <td>Lincoln Memorial</td>\n", | |
" <td>31208</td>\n", | |
" <td>M St & New Jersey Ave SE</td>\n", | |
" <td>W00196</td>\n", | |
" <td>Casual</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>3214</td>\n", | |
" <td>2018-07-01 00:01:13</td>\n", | |
" <td>2018-07-01 00:54:47</td>\n", | |
" <td>31620</td>\n", | |
" <td>5th & F St NW</td>\n", | |
" <td>31289</td>\n", | |
" <td>Henry Bacon Dr & Lincoln Memorial Circle NW</td>\n", | |
" <td>W21070</td>\n", | |
" <td>Member</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Duration start_time end_time from_station \\\n", | |
"0 267 2018-07-01 00:00:58 2018-07-01 00:05:26 31608 \n", | |
"1 530 2018-07-01 00:01:00 2018-07-01 00:09:50 31230 \n", | |
"2 3223 2018-07-01 00:01:07 2018-07-01 00:54:50 31620 \n", | |
"3 2153 2018-07-01 00:01:13 2018-07-01 00:37:06 31258 \n", | |
"4 3214 2018-07-01 00:01:13 2018-07-01 00:54:47 31620 \n", | |
"\n", | |
" Start station to_station \\\n", | |
"0 8th & Eye St SE / Barracks Row 31610 \n", | |
"1 Metro Center / 12th & G St NW 31266 \n", | |
"2 5th & F St NW 31289 \n", | |
"3 Lincoln Memorial 31208 \n", | |
"4 5th & F St NW 31289 \n", | |
"\n", | |
" End station Bike number Member type \n", | |
"0 Eastern Market / 7th & North Carolina Ave SE W23296 Member \n", | |
"1 11th & M St NW W20838 Member \n", | |
"2 Henry Bacon Dr & Lincoln Memorial Circle NW W22733 Member \n", | |
"3 M St & New Jersey Ave SE W00196 Casual \n", | |
"4 Henry Bacon Dr & Lincoln Memorial Circle NW W21070 Member " | |
] | |
}, | |
"execution_count": 98, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"trip_data = pd.read_csv(\n", | |
" '/Users/allaingu/Downloads/201807-capitalbikeshare-tripdata.zip',\n", | |
" parse_dates=['Start date', 'End date'])\\\n", | |
" .rename(columns={'Start station number': 'from_station', 'End station number': 'to_station', \n", | |
" 'Start date': 'start_time', 'End date': 'end_time'})\n", | |
"\n", | |
"trip_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 103, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>from_station</th>\n", | |
" <th>Start station</th>\n", | |
" <th>to_station</th>\n", | |
" <th>End station</th>\n", | |
" <th>Bike number</th>\n", | |
" <th>Member type</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>404761</td>\n", | |
" <td>404761</td>\n", | |
" <td>404761</td>\n", | |
" <td>404761</td>\n", | |
" <td>404761</td>\n", | |
" <td>404761</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>unique</th>\n", | |
" <td>506</td>\n", | |
" <td>506</td>\n", | |
" <td>506</td>\n", | |
" <td>506</td>\n", | |
" <td>4260</td>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>top</th>\n", | |
" <td>31258</td>\n", | |
" <td>Lincoln Memorial</td>\n", | |
" <td>31258</td>\n", | |
" <td>Lincoln Memorial</td>\n", | |
" <td>W22174</td>\n", | |
" <td>Member</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>freq</th>\n", | |
" <td>8169</td>\n", | |
" <td>8169</td>\n", | |
" <td>8223</td>\n", | |
" <td>8223</td>\n", | |
" <td>266</td>\n", | |
" <td>293289</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" from_station Start station to_station End station \\\n", | |
"count 404761 404761 404761 404761 \n", | |
"unique 506 506 506 506 \n", | |
"top 31258 Lincoln Memorial 31258 Lincoln Memorial \n", | |
"freq 8169 8169 8223 8223 \n", | |
"\n", | |
" Bike number Member type \n", | |
"count 404761 404761 \n", | |
"unique 4260 2 \n", | |
"top W22174 Member \n", | |
"freq 266 293289 " | |
] | |
}, | |
"execution_count": 103, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"trip_data.astype({'from_station': 'category', 'to_station': 'category', 'Bike number': 'category', 'Member type': 'category'})\\\n", | |
"[['from_station', 'Start station', 'to_station', 'End station', 'Bike number', 'Member type']].describe()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Duration</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>404761.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>1354.635708</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>2436.873717</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>60.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>450.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>802.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>1401.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>86288.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Duration\n", | |
"count 404761.000000\n", | |
"mean 1354.635708\n", | |
"std 2436.873717\n", | |
"min 60.000000\n", | |
"25% 450.000000\n", | |
"50% 802.000000\n", | |
"75% 1401.000000\n", | |
"max 86288.000000" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEMCAYAAAAvaXplAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAE/BJREFUeJzt3X2QXfV93/H3x6jGGIzMQ2ZLBY3wwDjBaNLYKqZ1k9mO\n0qLWbmA8GGuCbSmjoElN3DSjTgJJJ2n/oIGZYmrGtTOqcXmwa5BVT6zUxYkHZyfxTAELP4wMlKAa\nMJJ5MA+GyDXYS7794551L/tbae9d7e690r5fM3f2nN85v3N/56t797PnnHuPUlVIktTvNaMegCRp\n/BgOkqSG4SBJahgOkqSG4SBJahgOkqSG4SAtsyR3Jtk86nFIhxO/56CVJMmjwAQwDbwCPADcCuyo\nqr9Zguf7d8A5VfW+xd62tJQ8ctBK9C+q6g3ATwPXAr8D3DTsRpKsWuyBSePCcNCKVVUvVNVu4L3A\n5iTnJ5lK8msz6yTZkuQrffOV5MokDwMPd20fSfJ4kheT3JfkF7r2jcDvAu9NcjDJN7v2nzxHktck\n+bdJHkvydJJbk6zulq3tnm9zku8keSbJ7y1TebTCGQ5a8arqXmA/8AsDdrkEeDtwXjf/VeDvAacC\n/w34bJLXVdUXgf8A3FFVJ1XVz82xrS3d4x8DbwJOAj46a51/BLwZ2AD8fpKfHXCc0oIZDlLPd+n9\nch/EH1bVc1X1Q4Cq+lRVPVtV01V1PXA8vV/mg7gc+HBVfbuqDgJXA5tmnbL691X1w6r6JvBNYK6Q\nkRaV4SD1rAGeG3Ddx/tnkvybJA8meSHJ94HVwOkDbuvvAI/1zT8GrKJ30XzGk33T/5fe0YW0pAwH\nrXhJ/j69cPgK8APg9X2L//YcXX7yEb/u+sJvA5cBp1TVG4EXgMxe9xC+S+/C+Iy/S++TVE8NsQvS\nojMctGIlOTnJu4DbgU9V1V7gG8C7k7w+yTnA1nk28wZ6v8y/B6xK8vvAyX3LnwLWJjnUe+0zwG8l\nOTvJSfz/axTTC98z6cgZDlqJ/iTJX9M7PfR7wIeBX+2W3QD8iN4v9VuAT8+zrT8Fvgj8Fb1TQi/x\n6tNOn+1+Ppvka3P0/yRwG/AXwCNd/w8NuT/SovNLcJKkhkcOkqSG4SBJahgOkqSG4SBJahgOkqTG\nUXtXydNPP73Wrl27oL4/+MEPOPHEExd3QMcw6zUc6zU8azacI6nXfffd90xV/dR86x214bB27Vr2\n7NmzoL5TU1NMTk4u7oCOYdZrONZreNZsOEdSrySPzb+Wp5UkSXMwHCRJDcNBktQwHCRJDcNBktQw\nHCRJDcNBktQwHCRJjaP2S3DSqKy96guHXb593TRbDrPOo9e+c7GHJC06jxwkSQ3DQZLUMBwkSQ3D\nQZLUMBwkSQ3DQZLUMBwkSQ2/56AVZ77vKUjyyEGSNAfDQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwk\nSQ3DQZLUMBwkSY2BwiHJbyW5P8m3knwmyeuSnJrkS0ke7n6e0rf+1Un2JXkoyUV97W9LsrdbdmOS\ndO3HJ7mja78nydrF3lFJ0uDmDYcka4B/BayvqvOB44BNwFXAXVV1LnBXN0+S87rlbwE2Ah9Lcly3\nuY8DVwDndo+NXftW4PmqOge4AbhuUfZOkrQgg55WWgWckGQV8Hrgu8DFwC3d8luAS7rpi4Hbq+rl\nqnoE2AdckOQM4OSquruqCrh1Vp+Zbe0CNswcVUiSlt+8N96rqgNJ/iPwHeCHwJ9V1Z8lmaiqJ7rV\nngQmuuk1wN19m9jftf24m57dPtPn8e75ppO8AJwGPNM/liTbgG0AExMTTE1NDbibr3bw4MEF912J\njrV6bV83vaTbnzjh8M9xLNVysRxrr7Glthz1mjccumsJFwNnA98HPpvkff3rVFUlqaUZ4queZwew\nA2D9+vU1OTm5oO1MTU2x0L4r0bFWry1LfFfW7eumuX7vod9aj14+uaTPfzQ61l5jS2056jXIaaVf\nAh6pqu9V1Y+BzwH/EHiqO1VE9/Ppbv0DwFl9/c/s2g5007PbX9WnO3W1Gnh2ITskSTpyg4TDd4AL\nk7y+uw6wAXgQ2A1s7tbZDHy+m94NbOo+gXQ2vQvP93anoF5McmG3nQ/M6jOzrUuBL3fXJSRJIzDI\nNYd7kuwCvgZMA1+nd2rnJGBnkq3AY8Bl3fr3J9kJPNCtf2VVvdJt7oPAzcAJwJ3dA+Am4LYk+4Dn\n6H3aSZI0IgP9T3BV9QfAH8xqfpneUcRc618DXDNH+x7g/DnaXwLeM8hYJElLz29IS5IahoMkqWE4\nSJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIa\nhoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIahoMk\nqWE4SJIahoMkqbFq1AOQhrX2qi+MeghHZDHG/+i171yEkUiH5pGDJKlhOEiSGoaDJKlhOEiSGoaD\nJKlhOEiSGgOFQ5I3JtmV5H8neTDJP0hyapIvJXm4+3lK3/pXJ9mX5KEkF/W1vy3J3m7ZjUnStR+f\n5I6u/Z4kaxd7RyVJgxv0yOEjwBer6meAnwMeBK4C7qqqc4G7unmSnAdsAt4CbAQ+luS4bjsfB64A\nzu0eG7v2rcDzVXUOcANw3RHulyTpCMwbDklWA78I3ARQVT+qqu8DFwO3dKvdAlzSTV8M3F5VL1fV\nI8A+4IIkZwAnV9XdVVXArbP6zGxrF7Bh5qhCkrT8BjlyOBv4HvBfk3w9ySeSnAhMVNUT3TpPAhPd\n9Brg8b7++7u2Nd307PZX9amqaeAF4LThd0eStBgGuX3GKuCtwIeq6p4kH6E7hTSjqipJLcUA+yXZ\nBmwDmJiYYGpqakHbOXjw4IL7rkTjVq/t66ZHPYTDmjhh6cc4Tv8ei2HcXmPjbjnqNUg47Af2V9U9\n3fwueuHwVJIzquqJ7pTR093yA8BZff3P7NoOdNOz2/v77E+yClgNPDt7IFW1A9gBsH79+pqcnBxg\n+K2pqSkW2nclGrd6bRnzeyttXzfN9XuX9rZlj14+uaTbX27j9hobd8tRr3lPK1XVk8DjSd7cNW0A\nHgB2A5u7ts3A57vp3cCm7hNIZ9O78HxvdwrqxSQXdtcTPjCrz8y2LgW+3F2XkCSNwKB/3nwI+HSS\n1wLfBn6VXrDsTLIVeAy4DKCq7k+yk16ATANXVtUr3XY+CNwMnADc2T2gd7H7tiT7gOfofdpJkjQi\nA4VDVX0DWD/Hog2HWP8a4Jo52vcA58/R/hLwnkHGIklaen5DWpLUMBwkSQ3DQZLUMBwkSQ3DQZLU\nMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwk\nSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3D\nQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSY2BwyHJcUm+nuR/dPOnJvlSkoe7n6f0\nrXt1kn1JHkpyUV/725Ls7ZbdmCRd+/FJ7uja70mydvF2UZI0rGGOHH4TeLBv/irgrqo6F7irmyfJ\necAm4C3ARuBjSY7r+nwcuAI4t3ts7Nq3As9X1TnADcB1C9obSdKiGCgckpwJvBP4RF/zxcAt3fQt\nwCV97bdX1ctV9QiwD7ggyRnAyVV1d1UVcOusPjPb2gVsmDmqkCQtv1UDrvefgN8G3tDXNlFVT3TT\nTwIT3fQa4O6+9fZ3bT/upme3z/R5HKCqppO8AJwGPNM/iCTbgG0AExMTTE1NDTj8Vzt48OCC+65E\n41av7eumRz2Ew5o4YenHOE7/Hoth3F5j42456jVvOCR5F/B0Vd2XZHKudaqqktRiD26O59kB7ABY\nv359TU7OOZx5TU1NsdC+K9G41WvLVV8Y9RAOa/u6aa7fO+jfXQvz6OWTS7r95TZur7Fxtxz1GuQV\n/A7gl5P8c+B1wMlJPgU8leSMqnqiO2X0dLf+AeCsvv5ndm0HuunZ7f199idZBawGnl3gPkmSjtC8\n1xyq6uqqOrOq1tK70PzlqnofsBvY3K22Gfh8N70b2NR9Aulsehee7+1OQb2Y5MLuesIHZvWZ2dal\n3XMs+ZGIJGluR3Lsey2wM8lW4DHgMoCquj/JTuABYBq4sqpe6fp8ELgZOAG4s3sA3ATclmQf8By9\nEJIkjchQ4VBVU8BUN/0ssOEQ610DXDNH+x7g/DnaXwLeM8xYJElLx29IS5IahoMkqWE4SJIahoMk\nqWE4SJIahoMkqWE4SJIahoMkqbG0dweT5rB2zG+cJ8kjB0nSHAwHSVLDcJAkNQwHSVLDcJAkNQwH\nSVLDcJAkNQwHSVLDcJAkNQwHSVLDcJAkNQwHSVLDcJAkNQwHSVLDcJAkNQwHSVLDcJAkNQwHSVLD\ncJAkNQwHSVLDcJAkNQwHSVLDcJAkNQwHSVLDcJAkNVaNegCShrf2qi8cUf9Hr33nIo1ExyqPHCRJ\nDcNBktSYNxySnJXkz5M8kOT+JL/ZtZ+a5EtJHu5+ntLX5+ok+5I8lOSivva3JdnbLbsxSbr245Pc\n0bXfk2Tt4u+qJGlQgxw5TAPbq+o84ELgyiTnAVcBd1XVucBd3Tzdsk3AW4CNwMeSHNdt6+PAFcC5\n3WNj174VeL6qzgFuAK5bhH2TJC3QvOFQVU9U1de66b8GHgTWABcDt3Sr3QJc0k1fDNxeVS9X1SPA\nPuCCJGcAJ1fV3VVVwK2z+sxsaxewYeaoQpK0/Ia65tCd7vl54B5goqqe6BY9CUx002uAx/u67e/a\n1nTTs9tf1aeqpoEXgNOGGZskafEM/FHWJCcB/x3411X1Yv8f9lVVSWoJxjd7DNuAbQATExNMTU0t\naDsHDx5ccN+VaLHrtX3d9KJtaxxNnDD++zhur3/fk8NZjnoNFA5J/ha9YPh0VX2ua34qyRlV9UR3\nyujprv0AcFZf9zO7tgPd9Oz2/j77k6wCVgPPzh5HVe0AdgCsX7++JicnBxl+Y2pqioX2XYkWu15b\njvAz+uNu+7pprt873l8hevTyyVEP4VV8Tw5nOeo1yKeVAtwEPFhVH+5btBvY3E1vBj7f176p+wTS\n2fQuPN/bnYJ6McmF3TY/MKvPzLYuBb7cXZeQJI3AIH/evAN4P7A3yTe6tt8FrgV2JtkKPAZcBlBV\n9yfZCTxA75NOV1bVK12/DwI3AycAd3YP6IXPbUn2Ac/R+7STJGlE5g2HqvoKcKhPDm04RJ9rgGvm\naN8DnD9H+0vAe+YbiyRpefgNaUlSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDXG+2uckpaE/5Oc5uOR\ngySpYThIkhqGgySpYThIkhqGgySpYThIkhqGgySpYThIkhqGgySp4TekNbQj/XatpPHnkYMkqWE4\nSJIanlaSNDRv3Hfs88hBktQwHCRJDcNBktQwHCRJDcNBktQwHCRJDcNBktQwHCRJDcNBktQwHCRJ\nDcNBktQwHCRJDcNBktTwrqySlt3su7puXzfNliHu9OpdXZeeRw6SpIZHDivQsPfiH/avOklHP48c\nJEkNw0GS1BibcEiyMclDSfYluWrU45GklWwsrjkkOQ74z8A/AfYDX02yu6oeGO3IJI0j/w/rpTcW\n4QBcAOyrqm8DJLkduBgwHOZwpG8MaaUzXOaXqhr1GEhyKbCxqn6tm38/8Paq+o1Z620DtnWzbwYe\n6lu8GnhhwPnTgWcWbQdebfbzLma/w60z7LKVXq/DLbdewy0/0nrB0tXMerV+uqp+at61qmrkD+BS\n4BN98+8HPjrkNnYMOg/sWcJ92bFU/Q63zrDLVnq9Drfcei1vvZayZtZr4Y9xuSB9ADirb/7Mrm0Y\nfzLk/FJZ6PMM0u9w6wy7bKXX63DLrddwy63XcMvHuV4/MS6nlVYBfwVsoBcKXwV+paruX6Ln21NV\n65di28ci6zUc6zU8azac5ajXWFyQrqrpJL8B/ClwHPDJpQqGzo4l3PaxyHoNx3oNz5oNZ8nrNRZH\nDpKk8TIu1xwkSWPEcJAkNQwHSVLDcACSvCnJTUl2jXosR4MklyT5L0nuSPJPRz2ecZfkZ5P8UZJd\nSf7lqMdzNEhyYpI9Sd416rGMuySTSf6ye41NLtZ2j9lwSPLJJE8n+das9uYGf1X17araOpqRjoch\n6/XHVXUF8OvAe0cx3lEbsl4PVtWvA5cB7xjFeEdtmHp1fgfYubyjHB9D1quAg8Dr6N2bbnEs9bfs\nRvUAfhF4K/CtvrbjgP8DvAl4LfBN4Ly+5btGPe6jrF7XA28d9diPhnoBvwzcSe/7OyMf/zjXi94N\nODcBW4B3jXrsR0G9XtMtnwA+vVhjOGaPHKrqL4DnZjX/5AZ/VfUjYOYGfyveMPVKz3XAnVX1teUe\n6zgY9vVVVbur6p8Bly/vSMfDkPWaBC4EfgW4Iskx+3vqUIapV1X9Tbf8eeD4xRrDWHwJbhmtAR7v\nm98PvD3JacA1wM8nubqq/nAkoxs/c9YL+BDwS8DqJOdU1R+NYnBj6FCvr0ng3fTeuP9zBOMaV3PW\nq7obbibZAjzT98tvpTvU6+vdwEXAG4GPLtaTrbRwmFNVPUvv/LkGUFU3AjeOehxHi6qaAqZGPIyj\nTlXdPOoxHA2q6nPA5xZ7uyvtcG0xbvC3kliv4Viv4Viv4SxrvVZaOHwVODfJ2UleS++i1+4Rj2mc\nWa/hWK/hWK/hLGu9jtlwSPIZ4H8Bb06yP8nWqpoGZm7w9yCws5b2Bn9HDes1HOs1HOs1nHGolzfe\nkyQ1jtkjB0nSwhkOkqSG4SBJahgOkqSG4SBJahgOkqSG4SBJahgOkqSG4SBJavw/4ptek/96okkA\nAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x116b451d0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline\n", | |
"import numpy as np\n", | |
"\n", | |
"#trip_data[['Duration']].hist(bins=1000)\n", | |
"trip_data[['Duration']].hist(bins=np.logspace(1, 5, 20))\n", | |
"plt.xscale('log')\n", | |
"trip_data[['Duration']].describe()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Stations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 104, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(506, 2) (506, 2) (506, 1)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>station_name</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>station_id</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>31608</th>\n", | |
" <td>8th & Eye St SE / Barracks Row</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31230</th>\n", | |
" <td>Metro Center / 12th & G St NW</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31620</th>\n", | |
" <td>5th & F St NW</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31258</th>\n", | |
" <td>Lincoln Memorial</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31217</th>\n", | |
" <td>USDA / 12th & C St SW</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" station_name\n", | |
"station_id \n", | |
"31608 8th & Eye St SE / Barracks Row\n", | |
"31230 Metro Center / 12th & G St NW\n", | |
"31620 5th & F St NW\n", | |
"31258 Lincoln Memorial\n", | |
"31217 USDA / 12th & C St SW" | |
] | |
}, | |
"execution_count": 104, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"start_stations = trip_data[['Start station', 'from_station']]\\\n", | |
" .drop_duplicates()\\\n", | |
" .rename(columns={'Start station': 'station_name', 'from_station': 'station_id'})\n", | |
"\n", | |
"stop_stations = trip_data[['End station', 'to_station']]\\\n", | |
" .drop_duplicates()\\\n", | |
" .rename(columns={'End station': 'station_name', 'to_station': 'station_id'})\n", | |
"\n", | |
"stations = pd.concat([start_stations, stop_stations]).drop_duplicates().set_index('station_id')\n", | |
"print(start_stations.shape, stop_stations.shape, stations.shape)\n", | |
"\n", | |
"stations.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Station locations\n", | |
"\n", | |
"Annoying to be downloaded from somewhere else http://opendata.dc.gov/datasets/capital-bike-share-locations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>objectid</th>\n", | |
" <th>id</th>\n", | |
" <th>address</th>\n", | |
" <th>terminal_number</th>\n", | |
" <th>latitude</th>\n", | |
" <th>longitude</th>\n", | |
" <th>installed</th>\n", | |
" <th>locked</th>\n", | |
" <th>install_date</th>\n", | |
" <th>removal_date</th>\n", | |
" <th>temporary_install</th>\n", | |
" <th>number_of_bikes</th>\n", | |
" <th>number_of_empty_docks</th>\n", | |
" <th>x</th>\n", | |
" <th>y</th>\n", | |
" <th>se_anno_cad_data</th>\n", | |
" <th>owner</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>216255287</td>\n", | |
" <td>26</td>\n", | |
" <td>20th & E St NW</td>\n", | |
" <td>31204</td>\n", | |
" <td>38.896300</td>\n", | |
" <td>-77.045000</td>\n", | |
" <td>YES</td>\n", | |
" <td>NO</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NO</td>\n", | |
" <td>2</td>\n", | |
" <td>13</td>\n", | |
" <td>396096.323780</td>\n", | |
" <td>136495.534104</td>\n", | |
" <td>NaN</td>\n", | |
" <td>DC</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>216255288</td>\n", | |
" <td>27</td>\n", | |
" <td>21st & I St NW</td>\n", | |
" <td>31205</td>\n", | |
" <td>38.900800</td>\n", | |
" <td>-77.047000</td>\n", | |
" <td>YES</td>\n", | |
" <td>NO</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NO</td>\n", | |
" <td>0</td>\n", | |
" <td>33</td>\n", | |
" <td>395923.084260</td>\n", | |
" <td>136995.157405</td>\n", | |
" <td>NaN</td>\n", | |
" <td>DC</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>216255289</td>\n", | |
" <td>91</td>\n", | |
" <td>13th St & New York Ave NW</td>\n", | |
" <td>31227</td>\n", | |
" <td>38.900283</td>\n", | |
" <td>-77.029822</td>\n", | |
" <td>YES</td>\n", | |
" <td>NO</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NO</td>\n", | |
" <td>2</td>\n", | |
" <td>21</td>\n", | |
" <td>397413.134775</td>\n", | |
" <td>136937.139289</td>\n", | |
" <td>NaN</td>\n", | |
" <td>DC</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>216255290</td>\n", | |
" <td>105</td>\n", | |
" <td>7th & F St NW / National Portrait Gallery</td>\n", | |
" <td>31232</td>\n", | |
" <td>38.897296</td>\n", | |
" <td>-77.022201</td>\n", | |
" <td>YES</td>\n", | |
" <td>NO</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NO</td>\n", | |
" <td>2</td>\n", | |
" <td>22</td>\n", | |
" <td>398074.126479</td>\n", | |
" <td>136605.370063</td>\n", | |
" <td>NaN</td>\n", | |
" <td>DC</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>216255291</td>\n", | |
" <td>106</td>\n", | |
" <td>17th & K St NW / Farragut Square</td>\n", | |
" <td>31233</td>\n", | |
" <td>38.902061</td>\n", | |
" <td>-77.038322</td>\n", | |
" <td>YES</td>\n", | |
" <td>NO</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NO</td>\n", | |
" <td>1</td>\n", | |
" <td>33</td>\n", | |
" <td>396675.897757</td>\n", | |
" <td>137134.786759</td>\n", | |
" <td>NaN</td>\n", | |
" <td>DC</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" objectid id address terminal_number \\\n", | |
"0 216255287 26 20th & E St NW 31204 \n", | |
"1 216255288 27 21st & I St NW 31205 \n", | |
"2 216255289 91 13th St & New York Ave NW 31227 \n", | |
"3 216255290 105 7th & F St NW / National Portrait Gallery 31232 \n", | |
"4 216255291 106 17th & K St NW / Farragut Square 31233 \n", | |
"\n", | |
" latitude longitude installed locked install_date removal_date \\\n", | |
"0 38.896300 -77.045000 YES NO NaN NaN \n", | |
"1 38.900800 -77.047000 YES NO NaN NaN \n", | |
"2 38.900283 -77.029822 YES NO NaN NaN \n", | |
"3 38.897296 -77.022201 YES NO NaN NaN \n", | |
"4 38.902061 -77.038322 YES NO NaN NaN \n", | |
"\n", | |
" temporary_install number_of_bikes number_of_empty_docks x \\\n", | |
"0 NO 2 13 396096.323780 \n", | |
"1 NO 0 33 395923.084260 \n", | |
"2 NO 2 21 397413.134775 \n", | |
"3 NO 2 22 398074.126479 \n", | |
"4 NO 1 33 396675.897757 \n", | |
"\n", | |
" y se_anno_cad_data owner \n", | |
"0 136495.534104 NaN DC \n", | |
"1 136995.157405 NaN DC \n", | |
"2 136937.139289 NaN DC \n", | |
"3 136605.370063 NaN DC \n", | |
"4 137134.786759 NaN DC " | |
] | |
}, | |
"execution_count": 105, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"station_locations = pd.read_csv('/Users/allaingu/Downloads/Capital_Bike_Share_Locations.csv')\n", | |
"station_locations.columns = [c.lower() for c in station_locations.columns]\n", | |
"\n", | |
"station_locations.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 106, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "1085a3c4833e41aaa85d33ea1acc6fb2", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"from ipyleaflet import Map, Marker, MarkerCluster\n", | |
"\n", | |
"m = Map(center=(38.9, -77), zoom=10)\n", | |
"\n", | |
"marker_cluster = MarkerCluster(\n", | |
" markers=[Marker(location=(row['latitude'], row['longitude']), title=str(row['terminal_number'])) for _, row in station_locations.iterrows()]\n", | |
")\n", | |
"m.add_layer(marker_cluster);\n", | |
"\n", | |
"m" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 109, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(508, 17)\n", | |
"(506, 1)\n", | |
"(506, 3)\n" | |
] | |
} | |
], | |
"source": [ | |
"merged_stations = stations.join(\n", | |
" station_locations.set_index('terminal_number')[['latitude', 'longitude']],\n", | |
" how='inner'\n", | |
")\n", | |
"\n", | |
"print(station_locations.shape)\n", | |
"print(stations.shape)\n", | |
"print(merged_stations.shape)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 110, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>station_name</th>\n", | |
" <th>latitude</th>\n", | |
" <th>longitude</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>31608</th>\n", | |
" <td>8th & Eye St SE / Barracks Row</td>\n", | |
" <td>38.879200</td>\n", | |
" <td>-76.995300</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31230</th>\n", | |
" <td>Metro Center / 12th & G St NW</td>\n", | |
" <td>38.898364</td>\n", | |
" <td>-77.027869</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31620</th>\n", | |
" <td>5th & F St NW</td>\n", | |
" <td>38.897222</td>\n", | |
" <td>-77.019347</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31258</th>\n", | |
" <td>Lincoln Memorial</td>\n", | |
" <td>38.888251</td>\n", | |
" <td>-77.049426</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31217</th>\n", | |
" <td>USDA / 12th & C St SW</td>\n", | |
" <td>38.886277</td>\n", | |
" <td>-77.028242</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" station_name latitude longitude\n", | |
"31608 8th & Eye St SE / Barracks Row 38.879200 -76.995300\n", | |
"31230 Metro Center / 12th & G St NW 38.898364 -77.027869\n", | |
"31620 5th & F St NW 38.897222 -77.019347\n", | |
"31258 Lincoln Memorial 38.888251 -77.049426\n", | |
"31217 USDA / 12th & C St SW 38.886277 -77.028242" | |
] | |
}, | |
"execution_count": 110, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"merged_stations.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Flows" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 111, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>from_station</th>\n", | |
" <th>to_station</th>\n", | |
" <th>start_time</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">31000</th>\n", | |
" <th rowspan=\"5\" valign=\"top\">31000</th>\n", | |
" <th>2018-07-04 02:00:00</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2018-07-04 20:00:00</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2018-07-06 11:00:00</th>\n", | |
" <td>2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2018-07-07 11:00:00</th>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2018-07-08 10:00:00</th>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count\n", | |
"from_station to_station start_time \n", | |
"31000 31000 2018-07-04 02:00:00 1\n", | |
" 2018-07-04 20:00:00 1\n", | |
" 2018-07-06 11:00:00 2\n", | |
" 2018-07-07 11:00:00 4\n", | |
" 2018-07-08 10:00:00 1" | |
] | |
}, | |
"execution_count": 111, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"hourly_starts = trip_data.groupby(['from_station', 'to_station', pd.Grouper(key='start_time', freq='60Min')]).size().to_frame('count')\n", | |
"\n", | |
"hourly_starts.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 210, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(43097, 1)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>count</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>from_station</th>\n", | |
" <th>to_station</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>31247</th>\n", | |
" <th>31247</th>\n", | |
" <td>911</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31248</th>\n", | |
" <th>31248</th>\n", | |
" <td>871</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31258</th>\n", | |
" <th>31249</th>\n", | |
" <td>801</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31288</th>\n", | |
" <th>31288</th>\n", | |
" <td>790</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31258</th>\n", | |
" <th>31258</th>\n", | |
" <td>732</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31247</th>\n", | |
" <th>31258</th>\n", | |
" <td>641</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31290</th>\n", | |
" <th>31258</th>\n", | |
" <td>639</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31240</th>\n", | |
" <th>31240</th>\n", | |
" <td>638</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31290</th>\n", | |
" <th>31290</th>\n", | |
" <td>636</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31289</th>\n", | |
" <th>31289</th>\n", | |
" <td>614</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" count\n", | |
"from_station to_station \n", | |
"31247 31247 911\n", | |
"31248 31248 871\n", | |
"31258 31249 801\n", | |
"31288 31288 790\n", | |
"31258 31258 732\n", | |
"31247 31258 641\n", | |
"31290 31258 639\n", | |
"31240 31240 638\n", | |
"31290 31290 636\n", | |
"31289 31289 614" | |
] | |
}, | |
"execution_count": 210, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEACAYAAABYq7oeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEchJREFUeJzt3X+s3Xddx/Hny01GGQwZMze1XeyMFd2PGNzNnJKYm4xI\nEUIXxVkyXIfNGtwENE20NSb81WQkTmGLDCtgO10YdRJbwybO4gkxcZsdkJRtWWhYx1q7jd+zJAy7\nvP3jfusO93Pb3nvOufectc9HctLv+ZzP5/v93PvO6et+f5zvSVUhSVK/Hxv3BCRJk8dwkCQ1DAdJ\nUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1zh33BAZ10UUX1Zo1awYa+/3vf5/zzz9/\ntBPS0KzL5LEmk2mYujzyyCPfrKqfPF2/l204rFmzhv379w80ttfrMTMzM9oJaWjWZfJYk8k0TF2S\nPLWQfh5WkiQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUuNl+yG4YRw48j1u3PrZgccf\nuvVtI5yNJE0e9xwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUMBwkSQ3DQZLUOG04JPlkkueSfKWv\n7cIkDyT5avfv6/pe25bkYJInkrylr/3KJAe6125Pkq79vCSf7tofSrJmtD+iJGmxFrLnsBNYN6dt\nK7CvqtYC+7rnJLkU2ABc1o35aJJzujF3AjcBa7vHiXVuAr5TVT8L/CXwoUF/GEnSaJw2HKrqC8C3\n5zSvB3Z1y7uAa/va76mqF6rqSeAgcFWSlcAFVfVgVRVw15wxJ9Z1L3DNib0KSdJ4DHrOYaqqjnbL\nzwBT3fIq4Om+foe7tlXd8tz2HxlTVceB7wGvH3BekqQRGPrGe1VVSWoUkzmdJJuBzQBTU1P0er2B\n1jO1ArZccXzgeQy6XZ3asWPH/N1OGGsymZajLoOGw7NJVlbV0e6Q0XNd+xHg4r5+q7u2I93y3Pb+\nMYeTnAu8FvjWfButqh3ADoDp6emamZkZaPJ33L2H2w4MnouHrh9suzq1Xq/HoDXV0rAmk2k56jLo\nYaW9wMZueSOwp699Q3cF0iXMnnh+uDsE9XySq7vzCTfMGXNiXe8EPt+dl5Akjclp/3xO8ilgBrgo\nyWHgg8CtwO4km4CngOsAqurRJLuBx4DjwC1V9WK3qpuZvfJpBXB/9wD4BPB3SQ4ye+J7w0h+MknS\nwE4bDlX1rpO8dM1J+m8Hts/Tvh+4fJ72HwC/fbp5SJKWj5+QliQ1DAdJUsNwkCQ1DAdJUsNwkCQ1\nDAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJ\nUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUsNwkCQ1DAdJUmOo\ncEjyR0keTfKVJJ9K8sokFyZ5IMlXu39f19d/W5KDSZ5I8pa+9iuTHOheuz1JhpmXJGk4A4dDklXA\n+4HpqrocOAfYAGwF9lXVWmBf95wkl3avXwasAz6a5JxudXcCNwFru8e6QeclSRresIeVzgVWJDkX\neBXw38B6YFf3+i7g2m55PXBPVb1QVU8CB4GrkqwELqiqB6uqgLv6xkiSxmDgcKiqI8CfA18HjgLf\nq6p/Baaq6mjX7RlgqlteBTzdt4rDXduqbnluuyRpTM4ddGB3LmE9cAnwXeAfkry7v09VVZIaboo/\nss3NwGaAqakper3eQOuZWgFbrjg+8DwG3a5O7dixY/5uJ4w1mUzLUZeBwwF4M/BkVX0DIMlngF8F\nnk2ysqqOdoeMnuv6HwEu7hu/ums70i3PbW9U1Q5gB8D09HTNzMwMNPE77t7DbQcG/9EPXT/YdnVq\nvV6PQWuqpWFNJtNy1GWYcw5fB65O8qru6qJrgMeBvcDGrs9GYE+3vBfYkOS8JJcwe+L54e4Q1PNJ\nru7Wc0PfGEnSGAz853NVPZTkXuCLwHHgS8z+Vf9qYHeSTcBTwHVd/0eT7AYe6/rfUlUvdqu7GdgJ\nrADu7x6SpDEZ5rASVfVB4INzml9gdi9ivv7bge3ztO8HLh9mLpKk0fET0pKkhuEgSWoYDpKkhuEg\nSWoYDpKkhuEgSWoYDpKkhuEgSWoYDpKkhuEgSWoYDpKkhuEgSWoYDpKkhuEgSWoYDpKkhuEgSWoY\nDpKkhuEgSWoM9TWhGo81Wz871PhDt75tRDORdKZyz0GS1DAcJEkNw0GS1DAcJEkNw0GS1DAcJEkN\nw0GS1DAcJEkNw0GS1DAcJEkNw0GS1Bjq3kpJfgL4OHA5UMDvAU8AnwbWAIeA66rqO13/bcAm4EXg\n/VX1ua79SmAnsAK4D/hAVdUwc1tK3ttI0plu2D2HjwD/UlU/D/wi8DiwFdhXVWuBfd1zklwKbAAu\nA9YBH01yTreeO4GbgLXdY92Q85IkDWHgcEjyWuDXgE8AVNUPq+q7wHpgV9dtF3Btt7weuKeqXqiq\nJ4GDwFVJVgIXVNWD3d7CXX1jJEljMMyewyXAN4C/TfKlJB9Pcj4wVVVHuz7PAFPd8irg6b7xh7u2\nVd3y3HZJ0pgMc87hXOCXgPdV1UNJPkJ3COmEqqokIzt3kGQzsBlgamqKXq830HqmVsCWK46PalqL\nNui8Txh27sNuf6kcO3ZsYud2trImk2k56jJMOBwGDlfVQ93ze5kNh2eTrKyqo90ho+e6148AF/eN\nX921HemW57Y3qmoHsANgenq6ZmZmBpr4HXfv4bYD4/ueo0PXzww1/sZhT4gPuf2l0uv1GLSmWhrW\nZDItR10GPqxUVc8ATyd5Q9d0DfAYsBfY2LVtBPZ0y3uBDUnOS3IJsyeeH+4OQT2f5OokAW7oGyNJ\nGoNh/3x+H3B3klcAXwPew2zg7E6yCXgKuA6gqh5NspvZADkO3FJVL3bruZmXLmW9v3tIksZkqHCo\nqi8D0/O8dM1J+m8Hts/Tvp/Zz0pIkiaAn5CWJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUM\nB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lS\nw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lSw3CQJDUMB0lS49xhV5DkHGA/cKSq\n3p7kQuDTwBrgEHBdVX2n67sN2AS8CLy/qj7XtV8J7ARWAPcBH6iqGnZumt+arZ8davyhW982oplI\nmlSj2HP4APB43/OtwL6qWgvs656T5FJgA3AZsA74aBcsAHcCNwFru8e6EcxLkjSgocIhyWrgbcDH\n+5rXA7u65V3AtX3t91TVC1X1JHAQuCrJSuCCqnqw21u4q2+MJGkMhj2s9GHgj4HX9LVNVdXRbvkZ\nYKpbXgU82NfvcNf2v93y3PZGks3AZoCpqSl6vd5Ak55aAVuuOD7Q2FEYdN4njHPuMPz8T+bYsWNL\ntm4NxppMpuWoy8DhkOTtwHNV9UiSmfn6VFUlGdm5g6raAewAmJ6erpmZeTd7WnfcvYfbDgx9umVg\nh66fGWr8jUOeMxjWsPM/mV6vx6A11dKwJpNpOeoyzP+QbwLekeQ3gFcCFyT5e+DZJCur6mh3yOi5\nrv8R4OK+8au7tiPd8tx2SdKYDHzOoaq2VdXqqlrD7Inmz1fVu4G9wMau20ZgT7e8F9iQ5LwklzB7\n4vnh7hDU80muThLghr4xkqQxWIpjK7cCu5NsAp4CrgOoqkeT7AYeA44Dt1TVi92Ym3npUtb7u4fO\nUCe7lHbLFccXdMjMS2mlpTeScKiqHtDrlr8FXHOSftuB7fO07wcuH8VcJEnD8xPSkqSG4SBJahgO\nkqSG4SBJahgOkqSG4SBJahgOkqSG4SBJahgOkqSG4SBJaozvvtXSgPyaU2npuecgSWoYDpKkhuEg\nSWoYDpKkhuEgSWoYDpKkhpeyatGGvZRU0uRzz0GS1DAcJEkNw0GS1DAcJEkNw0GS1DAcJEkNw0GS\n1DAcJEkNw0GS1DAcJEkNw0GS1Bg4HJJcnOTfkzyW5NEkH+jaL0zyQJKvdv++rm/MtiQHkzyR5C19\n7VcmOdC9dnuSDPdjSZKGMcyN944DW6rqi0leAzyS5AHgRmBfVd2aZCuwFfiTJJcCG4DLgJ8C/i3J\nz1XVi8CdwE3AQ8B9wDrg/iHmJp2U30Etnd7Aew5VdbSqvtgt/w/wOLAKWA/s6rrtAq7tltcD91TV\nC1X1JHAQuCrJSuCCqnqwqgq4q2+MJGkMRnLOIcka4I3M/uU/VVVHu5eeAaa65VXA033DDndtq7rl\nue2SpDEZ+vsckrwa+EfgD6vq+f7TBVVVSWrYbfRtazOwGWBqaoperzfQeqZWwJYrjo9qWos26LxP\nGOfcl9K467JQw9bv5eTYsWNn1c/7crEcdRkqHJL8OLPBcHdVfaZrfjbJyqo62h0yeq5rPwJc3Dd8\nddd2pFue296oqh3ADoDp6emamZkZaN533L2H2w6M73uODl0/M9T4G8/QL9vZcsXxsdZloYat38tJ\nr9dj0PeZls5y1GWYq5UCfAJ4vKr+ou+lvcDGbnkjsKevfUOS85JcAqwFHu4OQT2f5OpunTf0jZEk\njcEwf6a9Cfhd4ECSL3dtfwrcCuxOsgl4CrgOoKoeTbIbeIzZK51u6a5UArgZ2AmsYPYqJa9UkqQx\nGjgcquo/gJN9HuGak4zZDmyfp30/cPmgc5EkjZafkJYkNQwHSVLDcJAkNQwHSVLDcJAkNQwHSVJj\n8j+OKk0Y7+qqs4F7DpKkhuEgSWoYDpKkhuEgSWoYDpKkhuEgSWoYDpKkhuEgSWoYDpKkhuEgSWp4\n+wxpmQ17+w3wFhxaeu45SJIahoMkqWE4SJIahoMkqWE4SJIahoMkqWE4SJIafs5Behnyq0q11Nxz\nkCQ1DAdJUsNwkCQ1DAdJUmNiTkgnWQd8BDgH+HhV3TrmKUlnrIWe0N5yxXFunKevJ7TPfBOx55Dk\nHOCvgLcClwLvSnLpeGclSWevSdlzuAo4WFVfA0hyD7AeeGyss5I0Ly+lPfNNSjisAp7ue34Y+OUx\nzUXSEhvFd1oMw3A6vUkJhwVJshnY3D39QZJH53R5LfC9eYbObb8I+OboZ7ggr82H5p3jotfD/D/r\nsP1P1+9Ury/09z9v2/vHV5fF/i5HuZ6lrstQ7WOsSTOXUa4nHxrJdsf2XmG4uvz0gnpV1dgfwK8A\nn+t7vg3YdpoxOxbSNl87sH+MP+u8c1zq9Sy0/+n6ner1hf7+T9E2lrqMqybLUZdh232vjL4mi63L\nuN4rE3FCGvgvYG2SS5K8AtgA7D3NmH9eYNup2sdhVHNZ7HoW2v90/U71+mJ+/9ZkcWMGrcuo2sfB\n98rCtrMk0qXQ2CX5DeDDzF7K+smq2r6E29pfVdNLtX4NxrpMHmsymZajLhNzzqGq7gPuW6bN7Vim\n7WhxrMvksSaTacnrMjF7DpKkyTEp5xwkSRPEcJAkNQwHSVLDcACSnJ9kV5K/SXL9uOcjSPIzST6R\n5N5xz0UvSXJt9z75dJJfH/d8BEl+IcnHktyb5PdHtd4zNhySfDLJc0m+Mqd9XZInkhxMsrVr/k3g\n3qq6CXjHsk/2LLGYmlTV16pq03hmenZZZF3+qXufvBf4nXHM92ywyJo8XlXvBa4D3jSqOZyx4QDs\nBNb1N5zi7q+reeneTi8u4xzPNjtZeE20fHay+Lr8Wfe6lsZOFlGTJO8APssIPw5wxoZDVX0B+Pac\n5v+/+2tV/RA4cffXw8wGBJzBv5NxW2RNtEwWU5fM+hBwf1V9cbnnerZY7HulqvZW1VuBkR0WP9v+\nI5zv7q+rgM8Av5XkTibr9gFng3lrkuT1ST4GvDHJtvFM7ax2svfK+4A3A+9M8t5xTOwsdrL3ykyS\n25P8NSPcc5iYT0iPU1V9H3jPuOehl1TVt5g9rq0JUlW3A7ePex56SVX1gN6o13u27TkcAS7ue766\na9P4WJPJZF0mz7LW5GwLh0Hu/qqlZU0mk3WZPMtakzM2HJJ8CvhP4A1JDifZVFXHgT8APgc8Duyu\nqrlfGKQlYk0mk3WZPJNQE2+8J0lqnLF7DpKkwRkOkqSG4SBJahgOkqSG4SBJahgOkqSG4SBJahgO\nkqSG4SBJavwfxdivrRBN1w0AAAAASUVORK5CYII=\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x12401aa20>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"overall_demand = trip_data.groupby(['from_station', 'to_station']).size().to_frame('count')\n", | |
"\n", | |
"print(overall_demand.shape)\n", | |
"overall_demand.head()\n", | |
"\n", | |
"log_count_bins = np.logspace(0, 3, 20)\n", | |
"overall_demand['count'].hist(bins=log_count_bins)\n", | |
"plt.xscale('log')\n", | |
"\n", | |
"overall_demand.sort_values('count', ascending=False).head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 165, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'#ffe600'" | |
] | |
}, | |
"execution_count": 165, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import matplotlib as mpl\n", | |
"import matplotlib.cm as cm\n", | |
"\n", | |
"scalar_to_color = cm.ScalarMappable(norm=mpl.colors.LogNorm(vmin=1, vmax=1000), cmap=plt.get_cmap('jet'))\n", | |
"\n", | |
"\n", | |
"def count_to_color(count):\n", | |
" rgb = scalar_to_color.to_rgba(count)[:-1]\n", | |
" return mpl.colors.rgb2hex(rgb)\n", | |
"\n", | |
"count_to_color(100)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 206, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def station_id_to_latlng(station_id):\n", | |
" return merged_stations.loc[station_id][['latitude', 'longitude']].values.tolist()\n", | |
"\n", | |
"polygons = [\n", | |
" Polygon(locations=[station_id_to_latlng(f), station_id_to_latlng(t)], \n", | |
" stroke=True, fill=True, opacity=.3, color=count_to_color(row['count']))\n", | |
" for (f, t), row in overall_demand.sort_values('count').tail(1000).iterrows()]\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 207, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "eaa584348d894adab53d98abfd19923b", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"from ipyleaflet import Map, Polygon, LayerGroup\n", | |
"\n", | |
"m = Map(center=(38.9, -77), zoom=10)\n", | |
"\n", | |
"m.add_layer(layer=LayerGroup(layers=polygons))\n", | |
"\n", | |
"m" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 211, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "3af3b4b84ead40f19276f0d31b265fe3", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"from ipyleaflet import Map, Marker, MarkerCluster\n", | |
"\n", | |
"m = Map(center=(38.9, -77), zoom=10)\n", | |
"\n", | |
"marker_cluster = MarkerCluster(\n", | |
" markers=[Marker(location=(row['latitude'], row['longitude']), title=str(row['terminal_number'])) for _, row in station_locations.iterrows()]\n", | |
")\n", | |
"m.add_layer(marker_cluster);\n", | |
"\n", | |
"m" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment