Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Last active November 20, 2019 18:01
Show Gist options
  • Save psychemedia/65e267fc6d7492bfe0af142ff4fda8d8 to your computer and use it in GitHub Desktop.
Save psychemedia/65e267fc6d7492bfe0af142ff4fda8d8 to your computer and use it in GitHub Desktop.
Example of scatter plot based on median values by hour from datetimes
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Simple Timeseries Plots in `pandas`\n",
"\n",
"Using a dummy dataset..."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First, let's create a function to generate a random datetime between two datetimes:"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"datetime.datetime(2019, 1, 29, 20, 40, 17)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#https://stackoverflow.com/a/553448/454773\n",
"from random import randrange\n",
"from datetime import timedelta\n",
"from datetime import datetime\n",
"\n",
"def random_datetime(start=datetime.strptime('1/1/2019 1:30 PM','%d/%m/%Y %I:%M %p'),\n",
" end=datetime.strptime('31/1/2019 4:50 AM', '%d/%m/%Y %I:%M %p')):\n",
" \"\"\"\n",
" This function will return a random datetime between two datetime \n",
" objects.\n",
" \"\"\"\n",
" delta = end - start\n",
" int_delta = (delta.days * 24 * 60 * 60) + delta.seconds\n",
" random_second = randrange(int_delta)\n",
" return start + timedelta(seconds=random_second)\n",
"\n",
"random_datetime()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we can createa dummy dataframe with a couple of columns of random data:"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>val1</th>\n",
" <th>datetime</th>\n",
" <th>val2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0.231440</td>\n",
" <td>2019-01-02 07:55:19</td>\n",
" <td>0.291342</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.266321</td>\n",
" <td>2019-01-22 01:14:58</td>\n",
" <td>0.009075</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.952032</td>\n",
" <td>2019-01-09 08:51:14</td>\n",
" <td>1.428048</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.643093</td>\n",
" <td>2019-01-12 09:13:53</td>\n",
" <td>1.097828</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>0.354115</td>\n",
" <td>2019-01-07 10:05:28</td>\n",
" <td>0.660788</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>995</td>\n",
" <td>0.042920</td>\n",
" <td>2019-01-17 18:38:57</td>\n",
" <td>0.042920</td>\n",
" </tr>\n",
" <tr>\n",
" <td>996</td>\n",
" <td>0.208727</td>\n",
" <td>2019-01-30 04:19:08</td>\n",
" <td>0.104364</td>\n",
" </tr>\n",
" <tr>\n",
" <td>997</td>\n",
" <td>0.249041</td>\n",
" <td>2019-01-02 03:52:03</td>\n",
" <td>0.072942</td>\n",
" </tr>\n",
" <tr>\n",
" <td>998</td>\n",
" <td>0.484270</td>\n",
" <td>2019-01-07 17:22:07</td>\n",
" <td>0.609609</td>\n",
" </tr>\n",
" <tr>\n",
" <td>999</td>\n",
" <td>0.765707</td>\n",
" <td>2019-01-03 03:24:35</td>\n",
" <td>0.224270</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" val1 datetime val2\n",
"0 0.231440 2019-01-02 07:55:19 0.291342\n",
"1 0.266321 2019-01-22 01:14:58 0.009075\n",
"2 0.952032 2019-01-09 08:51:14 1.428048\n",
"3 0.643093 2019-01-12 09:13:53 1.097828\n",
"4 0.354115 2019-01-07 10:05:28 0.660788\n",
".. ... ... ...\n",
"995 0.042920 2019-01-17 18:38:57 0.042920\n",
"996 0.208727 2019-01-30 04:19:08 0.104364\n",
"997 0.249041 2019-01-02 03:52:03 0.072942\n",
"998 0.484270 2019-01-07 17:22:07 0.609609\n",
"999 0.765707 2019-01-03 03:24:35 0.224270\n",
"\n",
"[1000 rows x 3 columns]"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"\n",
"numsamples = 1000\n",
"\n",
"df=pd.DataFrame({'val1':np.random.rand(numsamples)})\n",
"df['datetime']= [random_datetime() for i in range(numsamples)]\n",
"#Add some periodicity to the second column of random numbers...\n",
"df['val2']=df.apply(lambda x: x['val1']*(1+np.sin(2*np.pi*(x['datetime'].hour-6)/24)), axis=1)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>val1</th>\n",
" <th>val2</th>\n",
" </tr>\n",
" <tr>\n",
" <th>datetime</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0.401714</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>0.611881</td>\n",
" <td>0.020849</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>0.552599</td>\n",
" <td>0.074034</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>0.539660</td>\n",
" <td>0.158063</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>0.441285</td>\n",
" <td>0.220642</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>0.557000</td>\n",
" <td>0.412838</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>0.588485</td>\n",
" <td>0.588485</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>0.471580</td>\n",
" <td>0.593633</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>0.509826</td>\n",
" <td>0.764738</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>0.498654</td>\n",
" <td>0.851255</td>\n",
" </tr>\n",
" <tr>\n",
" <td>10</td>\n",
" <td>0.366607</td>\n",
" <td>0.684097</td>\n",
" </tr>\n",
" <tr>\n",
" <td>11</td>\n",
" <td>0.431500</td>\n",
" <td>0.848297</td>\n",
" </tr>\n",
" <tr>\n",
" <td>12</td>\n",
" <td>0.379588</td>\n",
" <td>0.759176</td>\n",
" </tr>\n",
" <tr>\n",
" <td>13</td>\n",
" <td>0.523362</td>\n",
" <td>1.028892</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14</td>\n",
" <td>0.436434</td>\n",
" <td>0.814397</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15</td>\n",
" <td>0.413217</td>\n",
" <td>0.705405</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16</td>\n",
" <td>0.329830</td>\n",
" <td>0.494745</td>\n",
" </tr>\n",
" <tr>\n",
" <td>17</td>\n",
" <td>0.686149</td>\n",
" <td>0.863737</td>\n",
" </tr>\n",
" <tr>\n",
" <td>18</td>\n",
" <td>0.514521</td>\n",
" <td>0.514521</td>\n",
" </tr>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>0.485645</td>\n",
" <td>0.359951</td>\n",
" </tr>\n",
" <tr>\n",
" <td>20</td>\n",
" <td>0.472008</td>\n",
" <td>0.236004</td>\n",
" </tr>\n",
" <tr>\n",
" <td>21</td>\n",
" <td>0.496440</td>\n",
" <td>0.145404</td>\n",
" </tr>\n",
" <tr>\n",
" <td>22</td>\n",
" <td>0.566665</td>\n",
" <td>0.075919</td>\n",
" </tr>\n",
" <tr>\n",
" <td>23</td>\n",
" <td>0.535262</td>\n",
" <td>0.018239</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" val1 val2\n",
"datetime \n",
"0 0.401714 0.000000\n",
"1 0.611881 0.020849\n",
"2 0.552599 0.074034\n",
"3 0.539660 0.158063\n",
"4 0.441285 0.220642\n",
"5 0.557000 0.412838\n",
"6 0.588485 0.588485\n",
"7 0.471580 0.593633\n",
"8 0.509826 0.764738\n",
"9 0.498654 0.851255\n",
"10 0.366607 0.684097\n",
"11 0.431500 0.848297\n",
"12 0.379588 0.759176\n",
"13 0.523362 1.028892\n",
"14 0.436434 0.814397\n",
"15 0.413217 0.705405\n",
"16 0.329830 0.494745\n",
"17 0.686149 0.863737\n",
"18 0.514521 0.514521\n",
"19 0.485645 0.359951\n",
"20 0.472008 0.236004\n",
"21 0.496440 0.145404\n",
"22 0.566665 0.075919\n",
"23 0.535262 0.018239"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# median value by hour\n",
"df.groupby([df['datetime'].dt.hour]).median()"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x11ff5d890>"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"ax = df.groupby([df['datetime'].dt.hour]).median().reset_index().plot(kind='scatter',\n",
" x='datetime',y='val1')\n",
"\n",
"df.groupby([df['datetime'].dt.hour]).median().reset_index().plot(kind='scatter',\n",
" x='datetime',y='val2', color='red', ax=ax)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment