Skip to content

Instantly share code, notes, and snippets.

@drbh
Created January 3, 2020 00:40
Show Gist options
  • Save drbh/f23087f2e74f3f392178ba16dca96642 to your computer and use it in GitHub Desktop.
Save drbh/f23087f2e74f3f392178ba16dca96642 to your computer and use it in GitHub Desktop.
Create some fake person spending data and get weekly and monthly summaries in a notebook
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"source": [
"import pandas as pd\n",
"from random import randrange, choice, randint\n",
"from datetime import timedelta\n",
"from datetime import datetime\n",
"\n",
"def random_date(start, end):\n",
" \"\"\"\n",
" This function will return a random datetime between two datetime \n",
" objects.\n",
" \"\"\"\n",
" delta = end - start\n",
" int_delta = (delta.days * 24 * 60 * 60) + delta.seconds\n",
" random_second = randrange(int_delta)\n",
" return start + timedelta(seconds=random_second)\n",
"\n",
"d1 = datetime.strptime('1/1/2019 1:30 PM', '%m/%d/%Y %I:%M %p')\n",
"d2 = datetime.strptime('1/1/2020 4:50 AM', '%m/%d/%Y %I:%M %p')\n",
"\n",
"opts=[\"gas\",\"food\",\"bar\",\"auto\",\"extra\"]\n",
"\n",
"n = 500\n",
"\n",
"spend_date = [ random_date(d1, d2) for x in range(0, n) ]\n",
"payees = [ choice(opts) for x in range(0, n)]\n",
"amounts = [ randint(0, 100) for x in range(0, n)]\n",
"credit = [ choice([True, True, True, False]) for x in range(0, n)]\n",
"desctip = [ \"-\" for x in range(0, n)]\n",
"location = [ (24.53, 53.23) for x in range(0, n)]\n",
"\n",
"colnames = [\"spend_date\",\"payees\",\"amounts\",\"credit\",\"desctip\",\"location\",]\n",
"data = [spend_date,payees,amounts,credit,desctip,location]\n",
"df = pd.DataFrame(data).T\n",
"df.columns = colnames\n",
"df = df.sort_values('spend_date')\n",
"\n",
"# group by week starting on Monday\n",
"groups = df.groupby(pd.Grouper(key='spend_date', freq='W-MON'))"
],
"outputs": [],
"execution_count": 1,
"metadata": {
"collapsed": false,
"outputHidden": false,
"inputHidden": false
}
},
{
"cell_type": "code",
"source": [
"list(groups)[0]"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": [
"(Timestamp('2019-01-07 00:00:00', freq='W-MON'),\n",
" spend_date payees amounts credit desctip location\n",
" 5 2019-01-01 21:50:14 food 0 False - (24.53, 53.23)\n",
" 133 2019-01-02 11:36:35 food 17 False - (24.53, 53.23)\n",
" 478 2019-01-02 14:17:05 gas 67 True - (24.53, 53.23)\n",
" 290 2019-01-03 04:44:07 food 40 True - (24.53, 53.23)\n",
" 42 2019-01-03 23:54:51 food 51 False - (24.53, 53.23)\n",
" 431 2019-01-04 23:19:01 gas 100 True - (24.53, 53.23)\n",
" 192 2019-01-05 15:58:55 food 4 True - (24.53, 53.23)\n",
" 332 2019-01-05 19:42:41 auto 61 False - (24.53, 53.23)\n",
" 483 2019-01-05 21:57:19 bar 28 True - (24.53, 53.23)\n",
" 493 2019-01-06 11:10:06 extra 0 True - (24.53, 53.23)\n",
" 394 2019-01-06 16:58:28 auto 71 True - (24.53, 53.23)\n",
" 315 2019-01-06 20:38:05 bar 22 True - (24.53, 53.23)\n",
" 228 2019-01-07 01:20:21 gas 92 True - (24.53, 53.23)\n",
" 380 2019-01-07 05:28:13 extra 66 True - (24.53, 53.23)\n",
" 65 2019-01-07 15:26:37 auto 10 True - (24.53, 53.23))"
]
},
"metadata": {}
}
],
"execution_count": 2,
"metadata": {
"collapsed": false,
"outputHidden": false,
"inputHidden": false
}
},
{
"cell_type": "code",
"source": [
"# group by week starting on Monday\n",
"weekly_spend = df.query(\"credit == True\").groupby([\n",
" pd.Grouper(key='spend_date', freq='W-MON')\n",
"]).amounts.sum()\n",
"\n",
"weekly_gain = df.query(\"credit == False\").groupby([\n",
" pd.Grouper(key='spend_date', freq='W-MON')\n",
"]).amounts.sum()\n",
"\n\n",
"print(\"you weekly spend:\", weekly_spend.head(5))\n",
"print(\"you weekly gain:\", weekly_gain.head(5))"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"('you weekly spend:', spend_date\n",
"2019-01-07 500\n",
"2019-01-14 147\n",
"2019-01-21 320\n",
"2019-01-28 428\n",
"2019-02-04 272\n",
"Freq: W-MON, Name: amounts, dtype: int64)\n",
"('you weekly gain:', spend_date\n",
"2019-01-07 129.0\n",
"2019-01-14 13.0\n",
"2019-01-21 NaN\n",
"2019-01-28 196.0\n",
"2019-02-04 43.0\n",
"Freq: W-MON, Name: amounts, dtype: float64)\n"
]
}
],
"execution_count": 5,
"metadata": {
"collapsed": false,
"outputHidden": false,
"inputHidden": false
}
},
{
"cell_type": "code",
"source": [
"monthly_spend = df.query(\"credit == True\").groupby([\n",
" pd.Grouper(key='spend_date', freq='M')\n",
"]).amounts.sum()\n",
"\n",
"monthly_gain = df.query(\"credit == False\").groupby([\n",
" pd.Grouper(key='spend_date', freq='M')\n",
"]).amounts.sum()\n",
"\n\n",
"print(\"you monthly spend:\", monthly_spend.head(5))\n",
"print(\"you monthly gain:\", monthly_gain.head(5))"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"('you monthly spend:', spend_date\n",
"2019-01-31 1541\n",
"2019-02-28 1606\n",
"2019-03-31 1157\n",
"2019-04-30 1289\n",
"2019-05-31 1263\n",
"Freq: M, Name: amounts, dtype: int64)\n",
"('you monthly gain:', spend_date\n",
"2019-01-31 381\n",
"2019-02-28 214\n",
"2019-03-31 815\n",
"2019-04-30 606\n",
"2019-05-31 235\n",
"Freq: M, Name: amounts, dtype: int64)\n"
]
}
],
"execution_count": 6,
"metadata": {
"collapsed": false,
"outputHidden": false,
"inputHidden": false
}
},
{
"cell_type": "code",
"source": [],
"outputs": [],
"execution_count": 5,
"metadata": {
"collapsed": false,
"outputHidden": false,
"inputHidden": false
}
}
],
"metadata": {
"kernel_info": {
"name": "python2"
},
"language_info": {
"mimetype": "text/x-python",
"nbconvert_exporter": "python",
"name": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9",
"file_extension": ".py",
"codemirror_mode": {
"version": 2,
"name": "ipython"
}
},
"kernelspec": {
"name": "python2",
"language": "python",
"display_name": "Python 2"
},
"nteract": {
"version": "0.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment