Created
January 3, 2020 00:40
-
-
Save drbh/f23087f2e74f3f392178ba16dca96642 to your computer and use it in GitHub Desktop.
Create some fake person spending data and get weekly and monthly summaries in a notebook
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import pandas as pd\n", | |
"from random import randrange, choice, randint\n", | |
"from datetime import timedelta\n", | |
"from datetime import datetime\n", | |
"\n", | |
"def random_date(start, end):\n", | |
" \"\"\"\n", | |
" This function will return a random datetime between two datetime \n", | |
" objects.\n", | |
" \"\"\"\n", | |
" delta = end - start\n", | |
" int_delta = (delta.days * 24 * 60 * 60) + delta.seconds\n", | |
" random_second = randrange(int_delta)\n", | |
" return start + timedelta(seconds=random_second)\n", | |
"\n", | |
"d1 = datetime.strptime('1/1/2019 1:30 PM', '%m/%d/%Y %I:%M %p')\n", | |
"d2 = datetime.strptime('1/1/2020 4:50 AM', '%m/%d/%Y %I:%M %p')\n", | |
"\n", | |
"opts=[\"gas\",\"food\",\"bar\",\"auto\",\"extra\"]\n", | |
"\n", | |
"n = 500\n", | |
"\n", | |
"spend_date = [ random_date(d1, d2) for x in range(0, n) ]\n", | |
"payees = [ choice(opts) for x in range(0, n)]\n", | |
"amounts = [ randint(0, 100) for x in range(0, n)]\n", | |
"credit = [ choice([True, True, True, False]) for x in range(0, n)]\n", | |
"desctip = [ \"-\" for x in range(0, n)]\n", | |
"location = [ (24.53, 53.23) for x in range(0, n)]\n", | |
"\n", | |
"colnames = [\"spend_date\",\"payees\",\"amounts\",\"credit\",\"desctip\",\"location\",]\n", | |
"data = [spend_date,payees,amounts,credit,desctip,location]\n", | |
"df = pd.DataFrame(data).T\n", | |
"df.columns = colnames\n", | |
"df = df.sort_values('spend_date')\n", | |
"\n", | |
"# group by week starting on Monday\n", | |
"groups = df.groupby(pd.Grouper(key='spend_date', freq='W-MON'))" | |
], | |
"outputs": [], | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false, | |
"outputHidden": false, | |
"inputHidden": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"list(groups)[0]" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": [ | |
"(Timestamp('2019-01-07 00:00:00', freq='W-MON'),\n", | |
" spend_date payees amounts credit desctip location\n", | |
" 5 2019-01-01 21:50:14 food 0 False - (24.53, 53.23)\n", | |
" 133 2019-01-02 11:36:35 food 17 False - (24.53, 53.23)\n", | |
" 478 2019-01-02 14:17:05 gas 67 True - (24.53, 53.23)\n", | |
" 290 2019-01-03 04:44:07 food 40 True - (24.53, 53.23)\n", | |
" 42 2019-01-03 23:54:51 food 51 False - (24.53, 53.23)\n", | |
" 431 2019-01-04 23:19:01 gas 100 True - (24.53, 53.23)\n", | |
" 192 2019-01-05 15:58:55 food 4 True - (24.53, 53.23)\n", | |
" 332 2019-01-05 19:42:41 auto 61 False - (24.53, 53.23)\n", | |
" 483 2019-01-05 21:57:19 bar 28 True - (24.53, 53.23)\n", | |
" 493 2019-01-06 11:10:06 extra 0 True - (24.53, 53.23)\n", | |
" 394 2019-01-06 16:58:28 auto 71 True - (24.53, 53.23)\n", | |
" 315 2019-01-06 20:38:05 bar 22 True - (24.53, 53.23)\n", | |
" 228 2019-01-07 01:20:21 gas 92 True - (24.53, 53.23)\n", | |
" 380 2019-01-07 05:28:13 extra 66 True - (24.53, 53.23)\n", | |
" 65 2019-01-07 15:26:37 auto 10 True - (24.53, 53.23))" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false, | |
"outputHidden": false, | |
"inputHidden": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# group by week starting on Monday\n", | |
"weekly_spend = df.query(\"credit == True\").groupby([\n", | |
" pd.Grouper(key='spend_date', freq='W-MON')\n", | |
"]).amounts.sum()\n", | |
"\n", | |
"weekly_gain = df.query(\"credit == False\").groupby([\n", | |
" pd.Grouper(key='spend_date', freq='W-MON')\n", | |
"]).amounts.sum()\n", | |
"\n\n", | |
"print(\"you weekly spend:\", weekly_spend.head(5))\n", | |
"print(\"you weekly gain:\", weekly_gain.head(5))" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"('you weekly spend:', spend_date\n", | |
"2019-01-07 500\n", | |
"2019-01-14 147\n", | |
"2019-01-21 320\n", | |
"2019-01-28 428\n", | |
"2019-02-04 272\n", | |
"Freq: W-MON, Name: amounts, dtype: int64)\n", | |
"('you weekly gain:', spend_date\n", | |
"2019-01-07 129.0\n", | |
"2019-01-14 13.0\n", | |
"2019-01-21 NaN\n", | |
"2019-01-28 196.0\n", | |
"2019-02-04 43.0\n", | |
"Freq: W-MON, Name: amounts, dtype: float64)\n" | |
] | |
} | |
], | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false, | |
"outputHidden": false, | |
"inputHidden": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"monthly_spend = df.query(\"credit == True\").groupby([\n", | |
" pd.Grouper(key='spend_date', freq='M')\n", | |
"]).amounts.sum()\n", | |
"\n", | |
"monthly_gain = df.query(\"credit == False\").groupby([\n", | |
" pd.Grouper(key='spend_date', freq='M')\n", | |
"]).amounts.sum()\n", | |
"\n\n", | |
"print(\"you monthly spend:\", monthly_spend.head(5))\n", | |
"print(\"you monthly gain:\", monthly_gain.head(5))" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"('you monthly spend:', spend_date\n", | |
"2019-01-31 1541\n", | |
"2019-02-28 1606\n", | |
"2019-03-31 1157\n", | |
"2019-04-30 1289\n", | |
"2019-05-31 1263\n", | |
"Freq: M, Name: amounts, dtype: int64)\n", | |
"('you monthly gain:', spend_date\n", | |
"2019-01-31 381\n", | |
"2019-02-28 214\n", | |
"2019-03-31 815\n", | |
"2019-04-30 606\n", | |
"2019-05-31 235\n", | |
"Freq: M, Name: amounts, dtype: int64)\n" | |
] | |
} | |
], | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false, | |
"outputHidden": false, | |
"inputHidden": false | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"outputs": [], | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false, | |
"outputHidden": false, | |
"inputHidden": false | |
} | |
} | |
], | |
"metadata": { | |
"kernel_info": { | |
"name": "python2" | |
}, | |
"language_info": { | |
"mimetype": "text/x-python", | |
"nbconvert_exporter": "python", | |
"name": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.9", | |
"file_extension": ".py", | |
"codemirror_mode": { | |
"version": 2, | |
"name": "ipython" | |
} | |
}, | |
"kernelspec": { | |
"name": "python2", | |
"language": "python", | |
"display_name": "Python 2" | |
}, | |
"nteract": { | |
"version": "0.12.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment