Forked from bsweger/Taxes Paid and Personal Income.ipynb
Created
November 15, 2016 19:38
-
-
Save bertomartin/406ec8113d7a49960d0b7b80621bee63 to your computer and use it in GitHub Desktop.
Federal Taxes by State
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:8150453367b906e07eef200351901fb9c424c6eeeb84438199e61c4a49c6d449" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Gross Tax Collections by State" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"source = IRS http://www.irs.gov/uac/SOI-Tax-Stats-Gross-Collections,-by-Type-of-Tax-and-State,-Fiscal-Year-IRS-Data-Book-Table-5\n", | |
"\n", | |
"**Notes:**\n", | |
"\n", | |
"* Estate and trust income tax is tax on the income (or loss) of an estate or a trust. Prior to FY 2008, this amount was reported as part of the individual income tax total\n", | |
"* Estate tax is the tax on a property transfer at death" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import glob" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 28 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"gross = pd.DataFrame()\n", | |
"grossfiles = glob.glob('gross/gross????.csv')\n", | |
"for file in grossfiles:\n", | |
" df = pd.read_csv(\n", | |
" file, \n", | |
" header=0,\n", | |
" thousands=',',\n", | |
" names=['state', 'total','business','individual_total', \n", | |
" 'individual_witheld_fica', 'individual_payments_seca', \n", | |
" 'ui', 'rr', 'estate_trust', 'estate', 'gift', 'excise']\n", | |
" )\n", | |
" df.dropna(subset=['state'],inplace=True)\n", | |
" df['year'] = year\n", | |
" gross = pd.concat([gross, df])\n", | |
"\n", | |
"#hacky stuff to fix up state names\n", | |
"gross['state'] = gross['state'].str.strip()\n", | |
"gross['state'] = gross['state'].str.replace('United States, total \\[4\\]', 'United States')\n", | |
"gross['state'] = gross['state'].str.replace('US Totals', 'United States')\n", | |
"gross['state'] = gross['state'].str.replace('US Total', 'United States')\n", | |
"#gross.replace('*','')\n", | |
"\n", | |
"#hacky stuff to get rid of non-numbers\n", | |
"for col in gross.columns.values:\n", | |
" if col <> 'state':\n", | |
" gross[col] = gross[col].replace('[^0-9.]+', '', regex=True) \n", | |
"\n", | |
"#hacky stuff to force datatypes to numeric even when there's incoming non-numeric data\n", | |
"#(see http://stackoverflow.com/questions/17457418/converting-pandas-dataframe-types)\n", | |
"gross['ui'] = gross['ui'].astype(str).convert_objects(convert_numeric=True)\n", | |
"gross['estate'] = gross['estate'].astype(str).convert_objects(convert_numeric=True)\n", | |
"gross['gift'] = gross['gift'].astype(str).convert_objects(convert_numeric=True)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 29 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Tax Refunds by State" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"source = IRS http://www.irs.gov/uac/SOI-Tax-Stats-Amount-of-Refunds-Issued,-Including-Interest,-by-State-and-Fiscal-Year-IRS-Data-Book-Table-8\n", | |
"\n", | |
"**Notes**\n", | |
"\n", | |
"* total tax refunds include overpayment refunds, refunds resulting from examination activity, refundable tax credits, and other refunds required by law. Also includes interest.\n", | |
"* employment refunds include refunds of self-employment income taxes under the Self Employment Insurance Contributions Act (SECA); railroad retirement taxes under the Railroad Retirement Tax Act (RRTA); unemployment insurance taxes under the Federal Unemployment Tax Act (FUTA); and Old-Age, Survivors, Disability, and Hospital Insurance (OASDHI) taxes on salaries and wages under the Federal Insurance Contributions Act (FICA)\n", | |
"* excise tax refunds exclude refunds of excise taxes paid from U.S. Customs and Border Protection and the Alcohol Tobacco Tax and Trade Bureau" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"refunds = pd.DataFrame()\n", | |
"refundfiles = glob.glob('refunds/refunds????.csv')\n", | |
"for file in refundfiles:\n", | |
" df = pd.read_csv(\n", | |
" file, \n", | |
" header=0,\n", | |
" thousands=',',\n", | |
" names=['state', 'total','business','individual_income', \n", | |
" 'estate_trust', 'employment', 'estate', 'gift', 'excise'],\n", | |
" )\n", | |
" df.dropna(subset=['state'],inplace=True)\n", | |
" df['year'] = year\n", | |
" refunds = pd.concat([refunds, df])\n", | |
" \n", | |
"#hacky stuff to fix up state names\n", | |
"refunds['state'] = refunds['state'].str.strip()\n", | |
"refunds['state'] = refunds['state'].str.replace('US Total', 'United States')\n", | |
"\n", | |
"#hacky stuff to get rid of non-numbers\n", | |
"for col in refunds.columns.values:\n", | |
" if col <> 'state':\n", | |
" refunds[col] = refunds[col].replace('[^0-9.]+', '', regex=True) \n", | |
"\n", | |
"#hacky stuff to force datatypes to numeric even when there's incoming non-numeric data\n", | |
"#(see http://stackoverflow.com/questions/17457418/converting-pandas-dataframe-types)\n", | |
"refunds['estate'] = refunds['estate'].astype(str).convert_objects(convert_numeric=True)\n", | |
"refunds['gift'] = refunds['gift'].astype(str).convert_objects(convert_numeric=True)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 30 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Net Taxes by State" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#create stripped-down, standardized versions of gross collections and refunds, so we can calculate net\n", | |
"g = gross[['year', 'state', 'total', 'business', 'individual_total', 'estate', 'gift', 'excise']]\n", | |
"g.set_index(['year', 'state'], inplace=True)\n", | |
"r = refunds\n", | |
"r['estate_trust'] = r['estate_trust'].fillna(0)\n", | |
"r['individual_total'] = r['individual_income'] + r['estate_trust'] + r['employment']\n", | |
"r = r[['year', 'state', 'total', 'business', 'individual_total', 'estate', 'gift','excise']]\n", | |
"r.set_index(['year', 'state'], inplace=True)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 31 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"net = g.sub(r)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 32 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"net.to_csv('net_taxes.csv')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 33 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment