drbh · January 3, 2020 00:40
diff --git a/explore.ipynb b/explore.ipynb
 {
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "from random import randrange, choice, randint\n",
        "from datetime import timedelta\n",
        "from datetime import datetime\n",
        "\n",
        "def random_date(start, end):\n",
        "    \"\"\"\n",
        "    This function will return a random datetime between two datetime \n",
        "    objects.\n",
        "    \"\"\"\n",
        "    delta = end - start\n",
        "    int_delta = (delta.days * 24 * 60 * 60) + delta.seconds\n",
        "    random_second = randrange(int_delta)\n",
        "    return start + timedelta(seconds=random_second)\n",
        "\n",
        "d1 = datetime.strptime('1/1/2019 1:30 PM', '%m/%d/%Y %I:%M %p')\n",
        "d2 = datetime.strptime('1/1/2020 4:50 AM', '%m/%d/%Y %I:%M %p')\n",
        "\n",
        "opts=[\"gas\",\"food\",\"bar\",\"auto\",\"extra\"]\n",
        "\n",
        "n = 500\n",
        "\n",
        "spend_date = [ random_date(d1, d2) for x in range(0, n) ]\n",
        "payees = [ choice(opts) for x in range(0, n)]\n",
        "amounts = [ randint(0, 100) for x in range(0, n)]\n",
        "credit = [ choice([True, True, True, False]) for x in range(0, n)]\n",
        "desctip = [ \"-\" for x in range(0, n)]\n",
        "location = [ (24.53, 53.23) for x in range(0, n)]\n",
        "\n",
        "colnames = [\"spend_date\",\"payees\",\"amounts\",\"credit\",\"desctip\",\"location\",]\n",
        "data = [spend_date,payees,amounts,credit,desctip,location]\n",
        "df = pd.DataFrame(data).T\n",
        "df.columns = colnames\n",
        "df = df.sort_values('spend_date')\n",
        "\n",
        "# group by week starting on Monday\n",
        "groups = df.groupby(pd.Grouper(key='spend_date', freq='W-MON'))"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "list(groups)[0]"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": [
              "(Timestamp('2019-01-07 00:00:00', freq='W-MON'),\n",
              "               spend_date payees amounts credit desctip        location\n",
              " 5    2019-01-01 21:50:14   food       0  False       -  (24.53, 53.23)\n",
              " 133  2019-01-02 11:36:35   food      17  False       -  (24.53, 53.23)\n",
              " 478  2019-01-02 14:17:05    gas      67   True       -  (24.53, 53.23)\n",
              " 290  2019-01-03 04:44:07   food      40   True       -  (24.53, 53.23)\n",
              " 42   2019-01-03 23:54:51   food      51  False       -  (24.53, 53.23)\n",
              " 431  2019-01-04 23:19:01    gas     100   True       -  (24.53, 53.23)\n",
              " 192  2019-01-05 15:58:55   food       4   True       -  (24.53, 53.23)\n",
              " 332  2019-01-05 19:42:41   auto      61  False       -  (24.53, 53.23)\n",
              " 483  2019-01-05 21:57:19    bar      28   True       -  (24.53, 53.23)\n",
              " 493  2019-01-06 11:10:06  extra       0   True       -  (24.53, 53.23)\n",
              " 394  2019-01-06 16:58:28   auto      71   True       -  (24.53, 53.23)\n",
              " 315  2019-01-06 20:38:05    bar      22   True       -  (24.53, 53.23)\n",
              " 228  2019-01-07 01:20:21    gas      92   True       -  (24.53, 53.23)\n",
              " 380  2019-01-07 05:28:13  extra      66   True       -  (24.53, 53.23)\n",
              " 65   2019-01-07 15:26:37   auto      10   True       -  (24.53, 53.23))"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# group by week starting on Monday\n",
        "weekly_spend = df.query(\"credit == True\").groupby([\n",
        "    pd.Grouper(key='spend_date', freq='W-MON')\n",
        "]).amounts.sum()\n",
        "\n",
        "weekly_gain = df.query(\"credit == False\").groupby([\n",
        "    pd.Grouper(key='spend_date', freq='W-MON')\n",
        "]).amounts.sum()\n",
        "\n\n",
        "print(\"you weekly spend:\", weekly_spend.head(5))\n",
        "print(\"you weekly gain:\", weekly_gain.head(5))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "('you weekly spend:', spend_date\n",
            "2019-01-07    500\n",
            "2019-01-14    147\n",
            "2019-01-21    320\n",
            "2019-01-28    428\n",
            "2019-02-04    272\n",
            "Freq: W-MON, Name: amounts, dtype: int64)\n",
            "('you weekly gain:', spend_date\n",
            "2019-01-07    129.0\n",
            "2019-01-14     13.0\n",
            "2019-01-21      NaN\n",
            "2019-01-28    196.0\n",
            "2019-02-04     43.0\n",
            "Freq: W-MON, Name: amounts, dtype: float64)\n"
          ]
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "monthly_spend = df.query(\"credit == True\").groupby([\n",
        "    pd.Grouper(key='spend_date', freq='M')\n",
        "]).amounts.sum()\n",
        "\n",
        "monthly_gain = df.query(\"credit == False\").groupby([\n",
        "    pd.Grouper(key='spend_date', freq='M')\n",
        "]).amounts.sum()\n",
        "\n\n",
        "print(\"you monthly spend:\", monthly_spend.head(5))\n",
        "print(\"you monthly gain:\", monthly_gain.head(5))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "('you monthly spend:', spend_date\n",
            "2019-01-31    1541\n",
            "2019-02-28    1606\n",
            "2019-03-31    1157\n",
            "2019-04-30    1289\n",
            "2019-05-31    1263\n",
            "Freq: M, Name: amounts, dtype: int64)\n",
            "('you monthly gain:', spend_date\n",
            "2019-01-31    381\n",
            "2019-02-28    214\n",
            "2019-03-31    815\n",
            "2019-04-30    606\n",
            "2019-05-31    235\n",
            "Freq: M, Name: amounts, dtype: int64)\n"
          ]
        }
      ],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [],
      "outputs": [],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python2"
    },
    "language_info": {
      "mimetype": "text/x-python",
      "nbconvert_exporter": "python",
      "name": "python",
      "pygments_lexer": "ipython2",
      "version": "2.7.9",
      "file_extension": ".py",
      "codemirror_mode": {
        "version": 2,
        "name": "ipython"
      }
    },
    "kernelspec": {
      "name": "python2",
      "language": "python",
      "display_name": "Python 2"
    },
    "nteract": {
      "version": "0.12.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"source": [
	"import pandas as pd\n",
	"from random import randrange, choice, randint\n",
	"from datetime import timedelta\n",
	"from datetime import datetime\n",
	"\n",
	"def random_date(start, end):\n",
	" \"\"\"\n",
	" This function will return a random datetime between two datetime \n",
	" objects.\n",
	" \"\"\"\n",
	" delta = end - start\n",
	" int_delta = (delta.days * 24 * 60 * 60) + delta.seconds\n",
	" random_second = randrange(int_delta)\n",
	" return start + timedelta(seconds=random_second)\n",
	"\n",
	"d1 = datetime.strptime('1/1/2019 1:30 PM', '%m/%d/%Y %I:%M %p')\n",
	"d2 = datetime.strptime('1/1/2020 4:50 AM', '%m/%d/%Y %I:%M %p')\n",
	"\n",
	"opts=[\"gas\",\"food\",\"bar\",\"auto\",\"extra\"]\n",
	"\n",
	"n = 500\n",
	"\n",
	"spend_date = [ random_date(d1, d2) for x in range(0, n) ]\n",
	"payees = [ choice(opts) for x in range(0, n)]\n",
	"amounts = [ randint(0, 100) for x in range(0, n)]\n",
	"credit = [ choice([True, True, True, False]) for x in range(0, n)]\n",
	"desctip = [ \"-\" for x in range(0, n)]\n",
	"location = [ (24.53, 53.23) for x in range(0, n)]\n",
	"\n",
	"colnames = [\"spend_date\",\"payees\",\"amounts\",\"credit\",\"desctip\",\"location\",]\n",
	"data = [spend_date,payees,amounts,credit,desctip,location]\n",
	"df = pd.DataFrame(data).T\n",
	"df.columns = colnames\n",
	"df = df.sort_values('spend_date')\n",
	"\n",
	"# group by week starting on Monday\n",
	"groups = df.groupby(pd.Grouper(key='spend_date', freq='W-MON'))"
	],
	"outputs": [],
	"execution_count": 1,
	"metadata": {
	"collapsed": false,
	"outputHidden": false,
	"inputHidden": false
	}
	},
	{
	"cell_type": "code",
	"source": [
	"list(groups)[0]"
	],
	"outputs": [
	{
	"output_type": "execute_result",
	"execution_count": 2,
	"data": {
	"text/plain": [
	"(Timestamp('2019-01-07 00:00:00', freq='W-MON'),\n",
	" spend_date payees amounts credit desctip location\n",
	" 5 2019-01-01 21:50:14 food 0 False - (24.53, 53.23)\n",
	" 133 2019-01-02 11:36:35 food 17 False - (24.53, 53.23)\n",
	" 478 2019-01-02 14:17:05 gas 67 True - (24.53, 53.23)\n",
	" 290 2019-01-03 04:44:07 food 40 True - (24.53, 53.23)\n",
	" 42 2019-01-03 23:54:51 food 51 False - (24.53, 53.23)\n",
	" 431 2019-01-04 23:19:01 gas 100 True - (24.53, 53.23)\n",
	" 192 2019-01-05 15:58:55 food 4 True - (24.53, 53.23)\n",
	" 332 2019-01-05 19:42:41 auto 61 False - (24.53, 53.23)\n",
	" 483 2019-01-05 21:57:19 bar 28 True - (24.53, 53.23)\n",
	" 493 2019-01-06 11:10:06 extra 0 True - (24.53, 53.23)\n",
	" 394 2019-01-06 16:58:28 auto 71 True - (24.53, 53.23)\n",
	" 315 2019-01-06 20:38:05 bar 22 True - (24.53, 53.23)\n",
	" 228 2019-01-07 01:20:21 gas 92 True - (24.53, 53.23)\n",
	" 380 2019-01-07 05:28:13 extra 66 True - (24.53, 53.23)\n",
	" 65 2019-01-07 15:26:37 auto 10 True - (24.53, 53.23))"
	]
	},
	"metadata": {}
	}
	],
	"execution_count": 2,
	"metadata": {
	"collapsed": false,
	"outputHidden": false,
	"inputHidden": false
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# group by week starting on Monday\n",
	"weekly_spend = df.query(\"credit == True\").groupby([\n",
	" pd.Grouper(key='spend_date', freq='W-MON')\n",
	"]).amounts.sum()\n",
	"\n",
	"weekly_gain = df.query(\"credit == False\").groupby([\n",
	" pd.Grouper(key='spend_date', freq='W-MON')\n",
	"]).amounts.sum()\n",
	"\n\n",
	"print(\"you weekly spend:\", weekly_spend.head(5))\n",
	"print(\"you weekly gain:\", weekly_gain.head(5))"
	],
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"('you weekly spend:', spend_date\n",
	"2019-01-07 500\n",
	"2019-01-14 147\n",
	"2019-01-21 320\n",
	"2019-01-28 428\n",
	"2019-02-04 272\n",
	"Freq: W-MON, Name: amounts, dtype: int64)\n",
	"('you weekly gain:', spend_date\n",
	"2019-01-07 129.0\n",
	"2019-01-14 13.0\n",
	"2019-01-21 NaN\n",
	"2019-01-28 196.0\n",
	"2019-02-04 43.0\n",
	"Freq: W-MON, Name: amounts, dtype: float64)\n"
	]
	}
	],
	"execution_count": 5,
	"metadata": {
	"collapsed": false,
	"outputHidden": false,
	"inputHidden": false
	}
	},
	{
	"cell_type": "code",
	"source": [
	"monthly_spend = df.query(\"credit == True\").groupby([\n",
	" pd.Grouper(key='spend_date', freq='M')\n",
	"]).amounts.sum()\n",
	"\n",
	"monthly_gain = df.query(\"credit == False\").groupby([\n",
	" pd.Grouper(key='spend_date', freq='M')\n",
	"]).amounts.sum()\n",
	"\n\n",
	"print(\"you monthly spend:\", monthly_spend.head(5))\n",
	"print(\"you monthly gain:\", monthly_gain.head(5))"
	],
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"('you monthly spend:', spend_date\n",
	"2019-01-31 1541\n",
	"2019-02-28 1606\n",
	"2019-03-31 1157\n",
	"2019-04-30 1289\n",
	"2019-05-31 1263\n",
	"Freq: M, Name: amounts, dtype: int64)\n",
	"('you monthly gain:', spend_date\n",
	"2019-01-31 381\n",
	"2019-02-28 214\n",
	"2019-03-31 815\n",
	"2019-04-30 606\n",
	"2019-05-31 235\n",
	"Freq: M, Name: amounts, dtype: int64)\n"
	]
	}
	],
	"execution_count": 6,
	"metadata": {
	"collapsed": false,
	"outputHidden": false,
	"inputHidden": false
	}
	},
	{
	"cell_type": "code",
	"source": [],
	"outputs": [],
	"execution_count": 5,
	"metadata": {
	"collapsed": false,
	"outputHidden": false,
	"inputHidden": false
	}
	}
	],
	"metadata": {
	"kernel_info": {
	"name": "python2"
	},
	"language_info": {
	"mimetype": "text/x-python",
	"nbconvert_exporter": "python",
	"name": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.9",
	"file_extension": ".py",
	"codemirror_mode": {
	"version": 2,
	"name": "ipython"
	}
	},
	"kernelspec": {
	"name": "python2",
	"language": "python",
	"display_name": "Python 2"
	},
	"nteract": {
	"version": "0.12.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}