Skip to content

Instantly share code, notes, and snippets.

@kshirsagarsiddharth
Created December 3, 2019 06:10
Show Gist options
  • Select an option

  • Save kshirsagarsiddharth/7ba94a463b5190d638eb812aa2614f09 to your computer and use it in GitHub Desktop.

Select an option

Save kshirsagarsiddharth/7ba94a463b5190d638eb812aa2614f09 to your computer and use it in GitHub Desktop.
Created on Cognitive Class Labs
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import dateutil"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('phone_data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>date</th>\n",
" <th>duration</th>\n",
" <th>item</th>\n",
" <th>month</th>\n",
" <th>network</th>\n",
" <th>network_type</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>15/10/14 06:58</td>\n",
" <td>34.429</td>\n",
" <td>data</td>\n",
" <td>2014-11</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>15/10/14 06:58</td>\n",
" <td>13.000</td>\n",
" <td>call</td>\n",
" <td>2014-11</td>\n",
" <td>Vodafone</td>\n",
" <td>mobile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>15/10/14 14:46</td>\n",
" <td>23.000</td>\n",
" <td>call</td>\n",
" <td>2014-11</td>\n",
" <td>Meteor</td>\n",
" <td>mobile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>15/10/14 14:48</td>\n",
" <td>4.000</td>\n",
" <td>call</td>\n",
" <td>2014-11</td>\n",
" <td>Tesco</td>\n",
" <td>mobile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>15/10/14 17:27</td>\n",
" <td>4.000</td>\n",
" <td>call</td>\n",
" <td>2014-11</td>\n",
" <td>Tesco</td>\n",
" <td>mobile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>825</th>\n",
" <td>825</td>\n",
" <td>13/03/15 00:38</td>\n",
" <td>1.000</td>\n",
" <td>sms</td>\n",
" <td>2015-03</td>\n",
" <td>world</td>\n",
" <td>world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>826</th>\n",
" <td>826</td>\n",
" <td>13/03/15 00:39</td>\n",
" <td>1.000</td>\n",
" <td>sms</td>\n",
" <td>2015-03</td>\n",
" <td>Vodafone</td>\n",
" <td>mobile</td>\n",
" </tr>\n",
" <tr>\n",
" <th>827</th>\n",
" <td>827</td>\n",
" <td>13/03/15 06:58</td>\n",
" <td>34.429</td>\n",
" <td>data</td>\n",
" <td>2015-03</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>828</th>\n",
" <td>828</td>\n",
" <td>14/03/15 00:13</td>\n",
" <td>1.000</td>\n",
" <td>sms</td>\n",
" <td>2015-03</td>\n",
" <td>world</td>\n",
" <td>world</td>\n",
" </tr>\n",
" <tr>\n",
" <th>829</th>\n",
" <td>829</td>\n",
" <td>14/03/15 00:16</td>\n",
" <td>1.000</td>\n",
" <td>sms</td>\n",
" <td>2015-03</td>\n",
" <td>world</td>\n",
" <td>world</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>830 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" index date duration item month network network_type\n",
"0 0 15/10/14 06:58 34.429 data 2014-11 data data\n",
"1 1 15/10/14 06:58 13.000 call 2014-11 Vodafone mobile\n",
"2 2 15/10/14 14:46 23.000 call 2014-11 Meteor mobile\n",
"3 3 15/10/14 14:48 4.000 call 2014-11 Tesco mobile\n",
"4 4 15/10/14 17:27 4.000 call 2014-11 Tesco mobile\n",
".. ... ... ... ... ... ... ...\n",
"825 825 13/03/15 00:38 1.000 sms 2015-03 world world\n",
"826 826 13/03/15 00:39 1.000 sms 2015-03 Vodafone mobile\n",
"827 827 13/03/15 06:58 34.429 data 2015-03 data data\n",
"828 828 14/03/15 00:13 1.000 sms 2015-03 world world\n",
"829 829 14/03/15 00:16 1.000 sms 2015-03 world world\n",
"\n",
"[830 rows x 7 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"index int64\n",
"date object\n",
"duration float64\n",
"item object\n",
"month object\n",
"network object\n",
"network_type object\n",
"dtype: object"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"data['date'] = data['date'].apply(dateutil.parser.parse,dayfirst = True)\n",
"#convert data from string to "
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"index int64\n",
"date datetime64[ns]\n",
"duration float64\n",
"item object\n",
"month object\n",
"network object\n",
"network_type object\n",
"dtype: object"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.dtypes"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"830"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#how many rows in the dataset\n",
"data['index'].count()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"10528.0"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#what was the longest phonecall or dataentry\n",
"data['duration'].max()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"92321.0"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#How many seconds of phone calls are recorded in total\n",
"data['duration'][data['item'] == 'call'].sum()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2014-11 230\n",
"2015-01 205\n",
"2014-12 157\n",
"2015-02 137\n",
"2015-03 101\n",
"Name: month, dtype: int64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#how many entries are there for each months\n",
"data['month'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['2014-11', '2014-12', '2015-01', '2015-02', '2015-03'])"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month']).groups.keys() #this gives unique keys or columns in the dataframe"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'2014-11': Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,\n",
" ...\n",
" 220, 221, 222, 223, 224, 225, 226, 227, 229, 230],\n",
" dtype='int64', length=230),\n",
" '2014-12': Int64Index([228, 231, 232, 233, 234, 235, 236, 237, 238, 239,\n",
" ...\n",
" 377, 378, 379, 380, 382, 383, 384, 385, 387, 388],\n",
" dtype='int64', length=157),\n",
" '2015-01': Int64Index([381, 386, 389, 390, 391, 392, 393, 394, 395, 396,\n",
" ...\n",
" 583, 584, 585, 587, 588, 589, 590, 591, 592, 593],\n",
" dtype='int64', length=205),\n",
" '2015-02': Int64Index([577, 586, 594, 595, 596, 597, 598, 599, 600, 601,\n",
" ...\n",
" 719, 720, 721, 722, 723, 724, 725, 726, 727, 728],\n",
" dtype='int64', length=137),\n",
" '2015-03': Int64Index([729, 730, 731, 732, 733, 734, 735, 736, 737, 738,\n",
" ...\n",
" 820, 821, 822, 823, 824, 825, 826, 827, 828, 829],\n",
" dtype='int64', length=101)}"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month']).groups"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"230"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data.groupby(['month']).groups['2014-11'])"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"157"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data.groupby(['month']).groups['2014-12'])"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"388"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(data.groupby(['item']).groups['call']) #query to find number of calls in the columns items"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>date</th>\n",
" <th>duration</th>\n",
" <th>item</th>\n",
" <th>network</th>\n",
" <th>network_type</th>\n",
" </tr>\n",
" <tr>\n",
" <th>month</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2014-11</th>\n",
" <td>0</td>\n",
" <td>2014-10-15 06:58:00</td>\n",
" <td>34.429</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-12</th>\n",
" <td>228</td>\n",
" <td>2014-11-13 06:58:00</td>\n",
" <td>34.429</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01</th>\n",
" <td>381</td>\n",
" <td>2014-12-13 06:58:00</td>\n",
" <td>34.429</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02</th>\n",
" <td>577</td>\n",
" <td>2015-01-13 06:58:00</td>\n",
" <td>34.429</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" <td>data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-03</th>\n",
" <td>729</td>\n",
" <td>2015-02-12 20:15:00</td>\n",
" <td>69.000</td>\n",
" <td>call</td>\n",
" <td>landline</td>\n",
" <td>landline</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index date duration item network network_type\n",
"month \n",
"2014-11 0 2014-10-15 06:58:00 34.429 data data data\n",
"2014-12 228 2014-11-13 06:58:00 34.429 data data data\n",
"2015-01 381 2014-12-13 06:58:00 34.429 data data data\n",
"2015-02 577 2015-01-13 06:58:00 34.429 data data data\n",
"2015-03 729 2015-02-12 20:15:00 69.000 call landline landline"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month']).first()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"month\n",
"2014-11 26639.441\n",
"2014-12 14641.870\n",
"2015-01 18223.299\n",
"2015-02 15522.299\n",
"2015-03 22750.441\n",
"Name: duration, dtype: float64"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month'])['duration'].sum()\n",
"#SUM OF DURATIONS PER MONTH"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"month\n",
"2014-11 230\n",
"2014-12 157\n",
"2015-01 205\n",
"2015-02 137\n",
"2015-03 101\n",
"Name: date, dtype: int64"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Get the number of dates entries in each month\n",
"data.groupby(['month'])['date'].count()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"network\n",
"Meteor 7200.0\n",
"Tesco 13828.0\n",
"Three 36464.0\n",
"Vodafone 14621.0\n",
"landline 18433.0\n",
"voicemail 1775.0\n",
"Name: duration, dtype: float64"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[data['item'] == 'call'].groupby(['network'])['duration'].sum()\n",
"#we are finding duration of calls in each months"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"month item\n",
"2014-11 call 107\n",
" data 29\n",
" sms 94\n",
"2014-12 call 79\n",
" data 30\n",
" sms 48\n",
"2015-01 call 88\n",
" data 31\n",
" sms 86\n",
"2015-02 call 67\n",
" data 31\n",
" sms 39\n",
"2015-03 call 47\n",
" data 29\n",
" sms 25\n",
"Name: index, dtype: int64"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#how many calls,sms,data entries are in each month\n",
"data.groupby(['month','item'])['index'].count()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fcda0f68eb8>"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month','network_type'])"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"month network_type\n",
"2014-11 data 29\n",
" landline 5\n",
" mobile 189\n",
" special 1\n",
" voicemail 6\n",
"2014-12 data 30\n",
" landline 7\n",
" mobile 108\n",
" voicemail 8\n",
" world 4\n",
"2015-01 data 31\n",
" landline 11\n",
" mobile 160\n",
" voicemail 3\n",
"2015-02 data 31\n",
" landline 8\n",
" mobile 90\n",
" special 2\n",
" voicemail 6\n",
"2015-03 data 29\n",
" landline 11\n",
" mobile 54\n",
" voicemail 4\n",
" world 3\n",
"Name: index, dtype: int64"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month','network_type'])['index'].count()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"month\n",
"2014-11 26639.441\n",
"2014-12 14641.870\n",
"2015-01 18223.299\n",
"2015-02 15522.299\n",
"2015-03 22750.441\n",
"Name: duration, dtype: float64"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#produces pandas series\n",
"data.groupby(['month'])['duration'].sum()"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duration</th>\n",
" </tr>\n",
" <tr>\n",
" <th>month</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2014-11</th>\n",
" <td>26639.441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-12</th>\n",
" <td>14641.870</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01</th>\n",
" <td>18223.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02</th>\n",
" <td>15522.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-03</th>\n",
" <td>22750.441</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duration\n",
"month \n",
"2014-11 26639.441\n",
"2014-12 14641.870\n",
"2015-01 18223.299\n",
"2015-02 15522.299\n",
"2015-03 22750.441"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#produces pandas dataframe\n",
"data.groupby(['month'])[['duration']].sum()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>month</th>\n",
" <th>duration</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2014-11</td>\n",
" <td>26639.441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2014-12</td>\n",
" <td>14641.870</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-01</td>\n",
" <td>18223.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-02</td>\n",
" <td>15522.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-03</td>\n",
" <td>22750.441</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" month duration\n",
"0 2014-11 26639.441\n",
"1 2014-12 14641.870\n",
"2 2015-01 18223.299\n",
"3 2015-02 15522.299\n",
"4 2015-03 22750.441"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month'],as_index = False).agg({\"duration\":\"sum\"})\n",
"#data.groupby(['month'])['duration'].sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#group dataframe by month and item and extract a number of stats for each group "
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>duration</th>\n",
" </tr>\n",
" <tr>\n",
" <th>month</th>\n",
" <th>item</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-11</th>\n",
" <th>call</th>\n",
" <td>25547.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>998.441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>94.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-12</th>\n",
" <th>call</th>\n",
" <td>13561.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>1032.870</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>48.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-01</th>\n",
" <th>call</th>\n",
" <td>17070.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>1067.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>86.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-02</th>\n",
" <th>call</th>\n",
" <td>14416.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>1067.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>39.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-03</th>\n",
" <th>call</th>\n",
" <td>21727.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>998.441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>25.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duration\n",
"month item \n",
"2014-11 call 25547.000\n",
" data 998.441\n",
" sms 94.000\n",
"2014-12 call 13561.000\n",
" data 1032.870\n",
" sms 48.000\n",
"2015-01 call 17070.000\n",
" data 1067.299\n",
" sms 86.000\n",
"2015-02 call 14416.000\n",
" data 1067.299\n",
" sms 39.000\n",
"2015-03 call 21727.000\n",
" data 998.441\n",
" sms 25.000"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month','item'])[['duration']].sum()"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>network_type</th>\n",
" </tr>\n",
" <tr>\n",
" <th>month</th>\n",
" <th>item</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-11</th>\n",
" <th>call</th>\n",
" <td>107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-12</th>\n",
" <th>call</th>\n",
" <td>79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-01</th>\n",
" <th>call</th>\n",
" <td>88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>86</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-02</th>\n",
" <th>call</th>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-03</th>\n",
" <th>call</th>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>25</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" network_type\n",
"month item \n",
"2014-11 call 107\n",
" data 29\n",
" sms 94\n",
"2014-12 call 79\n",
" data 30\n",
" sms 48\n",
"2015-01 call 88\n",
" data 31\n",
" sms 86\n",
"2015-02 call 67\n",
" data 31\n",
" sms 39\n",
"2015-03 call 47\n",
" data 29\n",
" sms 25"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month','item'])[['network_type']].count()"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"month item\n",
"2014-11 call 2014-10-15 06:58:00\n",
" data 2014-10-15 06:58:00\n",
" sms 2014-10-16 22:18:00\n",
"2014-12 call 2014-11-14 17:24:00\n",
" data 2014-11-13 06:58:00\n",
" sms 2014-11-14 17:28:00\n",
"2015-01 call 2014-12-15 20:03:00\n",
" data 2014-12-13 06:58:00\n",
" sms 2014-12-15 19:56:00\n",
"2015-02 call 2015-01-15 10:36:00\n",
" data 2015-01-13 06:58:00\n",
" sms 2015-01-15 12:23:00\n",
"2015-03 call 2015-02-12 20:15:00\n",
" data 2015-02-13 06:58:00\n",
" sms 2015-02-19 18:46:00\n",
"Name: date, dtype: datetime64[ns]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month','item'])['date'].first()\n",
"#get the first date per group"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>duration</th>\n",
" <th>network_type</th>\n",
" <th>date</th>\n",
" </tr>\n",
" <tr>\n",
" <th>month</th>\n",
" <th>item</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-11</th>\n",
" <th>call</th>\n",
" <td>25547.000</td>\n",
" <td>107</td>\n",
" <td>2014-10-15 06:58:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>998.441</td>\n",
" <td>29</td>\n",
" <td>2014-10-15 06:58:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>94.000</td>\n",
" <td>94</td>\n",
" <td>2014-10-16 22:18:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-12</th>\n",
" <th>call</th>\n",
" <td>13561.000</td>\n",
" <td>79</td>\n",
" <td>2014-11-14 17:24:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>1032.870</td>\n",
" <td>30</td>\n",
" <td>2014-11-13 06:58:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>48.000</td>\n",
" <td>48</td>\n",
" <td>2014-11-14 17:28:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-01</th>\n",
" <th>call</th>\n",
" <td>17070.000</td>\n",
" <td>88</td>\n",
" <td>2014-12-15 20:03:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>1067.299</td>\n",
" <td>31</td>\n",
" <td>2014-12-13 06:58:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>86.000</td>\n",
" <td>86</td>\n",
" <td>2014-12-15 19:56:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-02</th>\n",
" <th>call</th>\n",
" <td>14416.000</td>\n",
" <td>67</td>\n",
" <td>2015-01-15 10:36:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>1067.299</td>\n",
" <td>31</td>\n",
" <td>2015-01-13 06:58:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>39.000</td>\n",
" <td>39</td>\n",
" <td>2015-01-15 12:23:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-03</th>\n",
" <th>call</th>\n",
" <td>21727.000</td>\n",
" <td>47</td>\n",
" <td>2015-02-12 20:15:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>998.441</td>\n",
" <td>29</td>\n",
" <td>2015-02-13 06:58:00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>25.000</td>\n",
" <td>25</td>\n",
" <td>2015-02-19 18:46:00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duration network_type date\n",
"month item \n",
"2014-11 call 25547.000 107 2014-10-15 06:58:00\n",
" data 998.441 29 2014-10-15 06:58:00\n",
" sms 94.000 94 2014-10-16 22:18:00\n",
"2014-12 call 13561.000 79 2014-11-14 17:24:00\n",
" data 1032.870 30 2014-11-13 06:58:00\n",
" sms 48.000 48 2014-11-14 17:28:00\n",
"2015-01 call 17070.000 88 2014-12-15 20:03:00\n",
" data 1067.299 31 2014-12-13 06:58:00\n",
" sms 86.000 86 2014-12-15 19:56:00\n",
"2015-02 call 14416.000 67 2015-01-15 10:36:00\n",
" data 1067.299 31 2015-01-13 06:58:00\n",
" sms 39.000 39 2015-01-15 12:23:00\n",
"2015-03 call 21727.000 47 2015-02-12 20:15:00\n",
" data 998.441 29 2015-02-13 06:58:00\n",
" sms 25.000 25 2015-02-19 18:46:00"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#group the data frame by month and item and extract the number of stats from each group\n",
"data.groupby(['month','item']).agg({\n",
" 'duration':'sum',#Sum duration per group\n",
" 'network_type':'count',#get the count of networks\n",
" 'date':'first'#get the first date per group\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Cannot add integral value to Timestamp without freq.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/ops.py\u001b[0m in \u001b[0;36magg_series\u001b[0;34m(self, obj, func)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 663\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_aggregate_series_fast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 664\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/ops.py\u001b[0m in \u001b[0;36m_aggregate_series_fast\u001b[0;34m(self, obj, func)\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0mgrouper\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSeriesGrouper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mngroups\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdummy\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 681\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcounts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgrouper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 682\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcounts\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/reduction.pyx\u001b[0m in \u001b[0;36mpandas._libs.reduction.SeriesGrouper.get_result\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/reduction.pyx\u001b[0m in \u001b[0;36mpandas._libs.reduction.SeriesGrouper.get_result\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/groupby.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_builtin_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 894\u001b[0;31m \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 895\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-71-999d1d9ab14f>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m'duration'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'sum'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;34m'date'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m }\n",
"\u001b[0;32mpandas/_libs/tslibs/c_timestamp.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.c_timestamp._Timestamp.__sub__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslibs/c_timestamp.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.c_timestamp._Timestamp.__add__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Cannot add integral value to Timestamp without freq.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, func_or_funcs, *args, **kwargs)\u001b[0m\n\u001b[1;32m 862\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 863\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_python_agg_general\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc_or_funcs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 864\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/groupby.py\u001b[0m in \u001b[0;36m_python_agg_general\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 900\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcounts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg_series\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 901\u001b[0m \u001b[0moutput\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_try_cast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnumeric_only\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/ops.py\u001b[0m in \u001b[0;36magg_series\u001b[0;34m(self, obj, func)\u001b[0m\n\u001b[1;32m 664\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 665\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_aggregate_series_pure_python\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 666\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/ops.py\u001b[0m in \u001b[0;36m_aggregate_series_pure_python\u001b[0;34m(self, obj, func)\u001b[0m\n\u001b[1;32m 693\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msplitter\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 694\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 695\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/groupby.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_is_builtin_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 894\u001b[0;31m \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 895\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-71-999d1d9ab14f>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m'duration'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'sum'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;34m'date'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m }\n",
"\u001b[0;32mpandas/_libs/tslibs/c_timestamp.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.c_timestamp._Timestamp.__sub__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslibs/c_timestamp.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.c_timestamp._Timestamp.__add__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Cannot add integral value to Timestamp without freq.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-71-999d1d9ab14f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m'date'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m }\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'month'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maggr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, arg, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1453\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mAppender\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_shared_docs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"aggregate\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1454\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0maggregate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1455\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maggregate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1456\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1457\u001b[0m \u001b[0magg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maggregate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 227\u001b[0m \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_maybe_mangle_lambdas\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 229\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_aggregate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_level\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_level\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 230\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhow\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m_aggregate\u001b[0;34m(self, arg, *args, **kwargs)\u001b[0m\n\u001b[1;32m 504\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 505\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 506\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_agg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_agg_1dim\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 507\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mSpecificationError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 508\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m_agg\u001b[0;34m(arg, func)\u001b[0m\n\u001b[1;32m 454\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOrderedDict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 455\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0magg_how\u001b[0m \u001b[0;32min\u001b[0m \u001b[0marg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 456\u001b[0;31m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0magg_how\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 457\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/base.py\u001b[0m in \u001b[0;36m_agg_1dim\u001b[0;34m(name, how, subset)\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0;34m\"nested dictionary is ambiguous \"\u001b[0m \u001b[0;34m\"in aggregation\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 439\u001b[0m )\n\u001b[0;32m--> 440\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcolg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0maggregate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhow\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_level\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_level\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 441\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_agg_2dim\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36maggregate\u001b[0;34m(self, func_or_funcs, *args, **kwargs)\u001b[0m\n\u001b[1;32m 863\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_python_agg_general\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc_or_funcs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 864\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 865\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_aggregate_named\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc_or_funcs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 866\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 867\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msorted\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnames\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/conda/envs/python/lib/python3.6/site-packages/pandas/core/groupby/generic.py\u001b[0m in \u001b[0;36m_aggregate_named\u001b[0;34m(self, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 995\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgroup\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 996\u001b[0m \u001b[0mgroup\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 997\u001b[0;31m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgroup\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 998\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mSeries\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mIndex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 999\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Must produce aggregated value\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-71-999d1d9ab14f>\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 2\u001b[0m aggr = {\n\u001b[1;32m 3\u001b[0m \u001b[0;34m'duration'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m'sum'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;34m'date'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m \u001b[0;34m:\u001b[0m \u001b[0mmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m }\n\u001b[1;32m 6\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'month'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0magg\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maggr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslibs/c_timestamp.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.c_timestamp._Timestamp.__sub__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;32mpandas/_libs/tslibs/c_timestamp.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslibs.c_timestamp._Timestamp.__add__\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: Cannot add integral value to Timestamp without freq."
]
}
],
"source": [
"#defining aggregation procedure outside of the groupby operation\n",
"aggr = {\n",
" 'duration':'sum',\n",
" 'date':lambda x : max(x) - 1\n",
"}\n",
"data.groupby(['month']).agg(aggr)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"3\" halign=\"left\">duration</th>\n",
" <th>network_type</th>\n",
" <th colspan=\"3\" halign=\"left\">date</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>sum</th>\n",
" <th>count</th>\n",
" <th>min</th>\n",
" <th>first</th>\n",
" <th>nunique</th>\n",
" </tr>\n",
" <tr>\n",
" <th>month</th>\n",
" <th>item</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-11</th>\n",
" <th>call</th>\n",
" <td>1.000</td>\n",
" <td>1940.000</td>\n",
" <td>25547.000</td>\n",
" <td>107</td>\n",
" <td>2014-10-15 06:58:00</td>\n",
" <td>2014-10-15 06:58:00</td>\n",
" <td>104</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>34.429</td>\n",
" <td>34.429</td>\n",
" <td>998.441</td>\n",
" <td>29</td>\n",
" <td>2014-10-15 06:58:00</td>\n",
" <td>2014-10-15 06:58:00</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>94.000</td>\n",
" <td>94</td>\n",
" <td>2014-10-16 22:18:00</td>\n",
" <td>2014-10-16 22:18:00</td>\n",
" <td>79</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2014-12</th>\n",
" <th>call</th>\n",
" <td>2.000</td>\n",
" <td>2120.000</td>\n",
" <td>13561.000</td>\n",
" <td>79</td>\n",
" <td>2014-11-14 17:24:00</td>\n",
" <td>2014-11-14 17:24:00</td>\n",
" <td>76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>34.429</td>\n",
" <td>34.429</td>\n",
" <td>1032.870</td>\n",
" <td>30</td>\n",
" <td>2014-11-13 06:58:00</td>\n",
" <td>2014-11-13 06:58:00</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>48.000</td>\n",
" <td>48</td>\n",
" <td>2014-11-14 17:28:00</td>\n",
" <td>2014-11-14 17:28:00</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-01</th>\n",
" <th>call</th>\n",
" <td>2.000</td>\n",
" <td>1859.000</td>\n",
" <td>17070.000</td>\n",
" <td>88</td>\n",
" <td>2014-12-15 20:03:00</td>\n",
" <td>2014-12-15 20:03:00</td>\n",
" <td>84</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>34.429</td>\n",
" <td>34.429</td>\n",
" <td>1067.299</td>\n",
" <td>31</td>\n",
" <td>2014-12-13 06:58:00</td>\n",
" <td>2014-12-13 06:58:00</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>86.000</td>\n",
" <td>86</td>\n",
" <td>2014-12-15 19:56:00</td>\n",
" <td>2014-12-15 19:56:00</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-02</th>\n",
" <th>call</th>\n",
" <td>1.000</td>\n",
" <td>1863.000</td>\n",
" <td>14416.000</td>\n",
" <td>67</td>\n",
" <td>2015-01-15 10:36:00</td>\n",
" <td>2015-01-15 10:36:00</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>34.429</td>\n",
" <td>34.429</td>\n",
" <td>1067.299</td>\n",
" <td>31</td>\n",
" <td>2015-01-13 06:58:00</td>\n",
" <td>2015-01-13 06:58:00</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>39.000</td>\n",
" <td>39</td>\n",
" <td>2015-01-15 12:23:00</td>\n",
" <td>2015-01-15 12:23:00</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">2015-03</th>\n",
" <th>call</th>\n",
" <td>2.000</td>\n",
" <td>10528.000</td>\n",
" <td>21727.000</td>\n",
" <td>47</td>\n",
" <td>2015-02-12 20:15:00</td>\n",
" <td>2015-02-12 20:15:00</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>data</th>\n",
" <td>34.429</td>\n",
" <td>34.429</td>\n",
" <td>998.441</td>\n",
" <td>29</td>\n",
" <td>2015-02-13 06:58:00</td>\n",
" <td>2015-02-13 06:58:00</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sms</th>\n",
" <td>1.000</td>\n",
" <td>1.000</td>\n",
" <td>25.000</td>\n",
" <td>25</td>\n",
" <td>2015-02-19 18:46:00</td>\n",
" <td>2015-02-19 18:46:00</td>\n",
" <td>17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duration network_type date \\\n",
" min max sum count min \n",
"month item \n",
"2014-11 call 1.000 1940.000 25547.000 107 2014-10-15 06:58:00 \n",
" data 34.429 34.429 998.441 29 2014-10-15 06:58:00 \n",
" sms 1.000 1.000 94.000 94 2014-10-16 22:18:00 \n",
"2014-12 call 2.000 2120.000 13561.000 79 2014-11-14 17:24:00 \n",
" data 34.429 34.429 1032.870 30 2014-11-13 06:58:00 \n",
" sms 1.000 1.000 48.000 48 2014-11-14 17:28:00 \n",
"2015-01 call 2.000 1859.000 17070.000 88 2014-12-15 20:03:00 \n",
" data 34.429 34.429 1067.299 31 2014-12-13 06:58:00 \n",
" sms 1.000 1.000 86.000 86 2014-12-15 19:56:00 \n",
"2015-02 call 1.000 1863.000 14416.000 67 2015-01-15 10:36:00 \n",
" data 34.429 34.429 1067.299 31 2015-01-13 06:58:00 \n",
" sms 1.000 1.000 39.000 39 2015-01-15 12:23:00 \n",
"2015-03 call 2.000 10528.000 21727.000 47 2015-02-12 20:15:00 \n",
" data 34.429 34.429 998.441 29 2015-02-13 06:58:00 \n",
" sms 1.000 1.000 25.000 25 2015-02-19 18:46:00 \n",
"\n",
" \n",
" first nunique \n",
"month item \n",
"2014-11 call 2014-10-15 06:58:00 104 \n",
" data 2014-10-15 06:58:00 29 \n",
" sms 2014-10-16 22:18:00 79 \n",
"2014-12 call 2014-11-14 17:24:00 76 \n",
" data 2014-11-13 06:58:00 30 \n",
" sms 2014-11-14 17:28:00 41 \n",
"2015-01 call 2014-12-15 20:03:00 84 \n",
" data 2014-12-13 06:58:00 31 \n",
" sms 2014-12-15 19:56:00 58 \n",
"2015-02 call 2015-01-15 10:36:00 67 \n",
" data 2015-01-13 06:58:00 31 \n",
" sms 2015-01-15 12:23:00 27 \n",
"2015-03 call 2015-02-12 20:15:00 47 \n",
" data 2015-02-13 06:58:00 29 \n",
" sms 2015-02-19 18:46:00 17 "
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.groupby(['month','item']).agg({\n",
" 'duration':[min,max,sum],\n",
" 'network_type':'count',\n",
" 'date':['min','first','nunique']\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>min_duration</th>\n",
" <th>max_duration</th>\n",
" <th>total_duration</th>\n",
" </tr>\n",
" <tr>\n",
" <th>month</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2014-11</th>\n",
" <td>1.0</td>\n",
" <td>1940.0</td>\n",
" <td>26639.441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014-12</th>\n",
" <td>1.0</td>\n",
" <td>2120.0</td>\n",
" <td>14641.870</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-01</th>\n",
" <td>1.0</td>\n",
" <td>1859.0</td>\n",
" <td>18223.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-02</th>\n",
" <td>1.0</td>\n",
" <td>1863.0</td>\n",
" <td>15522.299</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015-03</th>\n",
" <td>1.0</td>\n",
" <td>10528.0</td>\n",
" <td>22750.441</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" min_duration max_duration total_duration\n",
"month \n",
"2014-11 1.0 1940.0 26639.441\n",
"2014-12 1.0 2120.0 14641.870\n",
"2015-01 1.0 1859.0 18223.299\n",
"2015-02 1.0 1863.0 15522.299\n",
"2015-03 1.0 10528.0 22750.441"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"grouped = data.groupby(['month']).agg({\"duration\":[min,max,sum]})\n",
"grouped.columns = grouped.columns.droplevel(level = 0)\n",
"grouped.rename(columns = {\"min\":\"min_duration\",\"max\":\"max_duration\",\"sum\":\"total_duration\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python",
"language": "python",
"name": "conda-env-python-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment