Created
November 21, 2017 17:32
-
-
Save kforeman/223a79a92e8a2fe1d174a11df6c25bb1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"import pandas as pd\n", | |
"import seaborn as sns\n", | |
"import os, sys" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Make a list of all the codes we're interested in\n", | |
"_i.e. first two pages of Asher's Excel spreadsheet_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>level_1</th>\n", | |
" <th>statute</th>\n", | |
" <th>odyssey</th>\n", | |
" <th>description</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>11-5-10.3</td>\n", | |
" <td>14588</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>11-5-10.3</td>\n", | |
" <td>14589</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>11-5-10.4</td>\n", | |
" <td>14590</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>3</td>\n", | |
" <td>11-5-10.4</td>\n", | |
" <td>14591</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4</td>\n", | |
" <td>11-5-10</td>\n", | |
" <td>11187</td>\n", | |
" <td>Assault on Person > 60 Causing Bodily Injury</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" level_1 statute odyssey \\\n", | |
"0 0 11-5-10.3 14588 \n", | |
"0 1 11-5-10.3 14589 \n", | |
"0 2 11-5-10.4 14590 \n", | |
"0 3 11-5-10.4 14591 \n", | |
"0 4 11-5-10 11187 \n", | |
"\n", | |
" description \n", | |
"0 Assault on Person > 60 by Caretaker Causing Bo... \n", | |
"0 Assault on Person > 60 by Caretaker Causing Bo... \n", | |
"0 Assault on Person > 60 by Caretaker Causing Se... \n", | |
"0 Assault on Person > 60 by Caretaker Causing Se... \n", | |
"0 Assault on Person > 60 Causing Bodily Injury " | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"all_codes = pd.concat(pd.read_excel('../data/raw/Elder Offense Codes FINAL.xlsx', [0], skiprows=1)).reset_index(True)\n", | |
"all_codes.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"statute = all_codes['statute'].dropna().unique().tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"odyssey = all_codes['odyssey'].dropna().unique().tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'11-5-10.3|11-5-10.4|11-5-10|11-5-10.1|11-5-10 and 12-29-2(a)(2)|11-5-10.1 and 12-29-2(a)(2)|11-68-2|11-68-2(a)(1)|11-68-2(a)(3)|11-68-2(a)(2)|11-41-1|11-41-7|14588|14589|14590|14591|11187|14581|14585|11-5-10|11-5-10.3|11050100|11050101J|11-5-10A|14582|14583|14584|14586|11-68-2|14880|14882|14881|11061|14376|11062|14377|11068|14388|11069|14389|11-41-5(b)|11050104|11050101|11-5-10.1A|11-5-10.1'" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"charges = '|'.join([str(c) for c in statute + odyssey if c != ' '])\n", | |
"charges" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Load in the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_hdf(os.path.join(os.pardir, 'data', 'clean', 'CRIM_CASE.h5'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IDENTIFIER</th>\n", | |
" <th>CASE_NO</th>\n", | |
" <th>CASE_LOCN</th>\n", | |
" <th>CASE_TYPE</th>\n", | |
" <th>CASE_FILING</th>\n", | |
" <th>ARREST_AGENCY</th>\n", | |
" <th>ARREST_DATE</th>\n", | |
" <th>CHARGE_NUMBER</th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>CHARGE_MAINT</th>\n", | |
" <th>CHARGE_FILING</th>\n", | |
" <th>CHARGE_CITY</th>\n", | |
" <th>CHARGE_DISP</th>\n", | |
" <th>CHARGE_DATE</th>\n", | |
" <th>PLEA_DISP</th>\n", | |
" <th>PLEA_MAINT</th>\n", | |
" <th>PLEA_DATE</th>\n", | |
" <th>PROSECUTOR</th>\n", | |
" <th>ATTORNEY</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4680901</td>\n", | |
" <td>31-2016-02909</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-09T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4551414</td>\n", | |
" <td>31-2016-02925</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-09T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON</td>\n", | |
" <td>DMNON</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4550775</td>\n", | |
" <td>31-2016-03096</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-16T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4623177</td>\n", | |
" <td>31-2016-03286</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-13T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-13T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-27T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4658064</td>\n", | |
" <td>31-2016-03298</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>1</td>\n", | |
" <td>14383</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IDENTIFIER CASE_NO CASE_LOCN CASE_TYPE \\\n", | |
"0 4680901 31-2016-02909 3D M \n", | |
"1 4551414 31-2016-02925 3D M \n", | |
"2 4550775 31-2016-03096 3D M \n", | |
"3 4623177 31-2016-03286 3D M \n", | |
"4 4658064 31-2016-03298 3D M \n", | |
"\n", | |
" CASE_FILING ARREST_AGENCY \\\n", | |
"0 2016-03-29T00:00:00.000000000 CRANSTON POLICE DEPARTMENT \n", | |
"1 2016-03-29T00:00:00.000000000 CRANSTON POLICE DEPARTMENT \n", | |
"2 2016-03-30T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"3 2016-04-13T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"4 2016-04-18T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"\n", | |
" ARREST_DATE CHARGE_NUMBER CHARGE_CODE CHARGE_MAINT \\\n", | |
"0 2016-03-09T00:00:00.000000000 1 003 NaN \n", | |
"1 2016-03-09T00:00:00.000000000 1 003 NaN \n", | |
"2 2016-03-16T00:00:00.000000000 1 003 NaN \n", | |
"3 NaT 1 003 NaN \n", | |
"4 NaT 1 14383 NaN \n", | |
"\n", | |
" CHARGE_FILING CHARGE_CITY CHARGE_DISP \\\n", | |
"0 2016-03-29T00:00:00.000000000 CRANSTON GPNOL \n", | |
"1 2016-03-29T00:00:00.000000000 CRANSTON DMNON \n", | |
"2 2016-03-30T00:00:00.000000000 COVENTRY GPNOL \n", | |
"3 2016-04-13T00:00:00.000000000 COVENTRY GPNOL \n", | |
"4 2016-04-18T00:00:00.000000000 COVENTRY GPNOL \n", | |
"\n", | |
" CHARGE_DATE PLEA_DISP PLEA_MAINT PLEA_DATE PROSECUTOR \\\n", | |
"0 2016-03-29T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1 2016-03-30T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"2 2016-03-30T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"3 2016-04-27T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"4 2016-04-18T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"\n", | |
" ATTORNEY \n", | |
"0 NaN \n", | |
"1 NaN \n", | |
"2 NaN \n", | |
"3 NaN \n", | |
"4 NaN " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Find all of the records that have some version of the desired charge codes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/kfor/.conda/envs/forecast-2-fork/lib/python2.7/site-packages/ipykernel_launcher.py:1: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
} | |
], | |
"source": [ | |
"records = df.loc[df['CHARGE_CODE'].str.contains(charges)]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"_drop 11-41-10, 11-41-11, etc since we're only interested in 11-41-1[a-z]_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(['11-41-11F', '11-41-11.1M', '11-41-11M', '11-41-16.1F', '11-41-13',\n", | |
" '11-41-12', '11-41-14', '11-41-10', '11-41-19', '11-41-17'], dtype=object)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.ix[records['CHARGE_CODE'].str.startswith('11-41-1'), 'CHARGE_CODE'].unique()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"records = records.loc[records['CHARGE_CODE'].isin(['11-41-11F', '11-41-11.1M', '11-41-11M', '11-41-16.1F', \n", | |
" '11-41-13', '11-41-12', '11-41-14', '11-41-10', \n", | |
" '11-41-19', '11-41-17']) == False]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IDENTIFIER</th>\n", | |
" <th>CASE_NO</th>\n", | |
" <th>CASE_LOCN</th>\n", | |
" <th>CASE_TYPE</th>\n", | |
" <th>CASE_FILING</th>\n", | |
" <th>ARREST_AGENCY</th>\n", | |
" <th>ARREST_DATE</th>\n", | |
" <th>CHARGE_NUMBER</th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>CHARGE_MAINT</th>\n", | |
" <th>CHARGE_FILING</th>\n", | |
" <th>CHARGE_CITY</th>\n", | |
" <th>CHARGE_DISP</th>\n", | |
" <th>CHARGE_DATE</th>\n", | |
" <th>PLEA_DISP</th>\n", | |
" <th>PLEA_MAINT</th>\n", | |
" <th>PLEA_DATE</th>\n", | |
" <th>PROSECUTOR</th>\n", | |
" <th>ATTORNEY</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1717</th>\n", | |
" <td>4679963</td>\n", | |
" <td>31-2016-04011</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>2</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1722</th>\n", | |
" <td>4679963</td>\n", | |
" <td>31-2016-04011</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>3</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1791</th>\n", | |
" <td>4650239</td>\n", | |
" <td>31-2002-05380</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2002-11-29T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>2</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2002-11-29T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>DM48A</td>\n", | |
" <td>2002-12-02T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2501</th>\n", | |
" <td>4650504</td>\n", | |
" <td>32-2002-04093</td>\n", | |
" <td>3D</td>\n", | |
" <td>F</td>\n", | |
" <td>2002-09-07T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>2002-09-07T00:00:00.000000000</td>\n", | |
" <td>2</td>\n", | |
" <td>14586</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2002-09-07T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>NCNIS</td>\n", | |
" <td>2002-10-24T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3532</th>\n", | |
" <td>4636013</td>\n", | |
" <td>P2-1999-1892A</td>\n", | |
" <td>P</td>\n", | |
" <td>F</td>\n", | |
" <td>1999-04-13T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1999-04-13T00:00:00.000000000</td>\n", | |
" <td>101</td>\n", | |
" <td>14581</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1999-04-13T00:00:00.000000000</td>\n", | |
" <td>BARRINGTON</td>\n", | |
" <td>CGINS</td>\n", | |
" <td>1999-05-03T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IDENTIFIER CASE_NO CASE_LOCN CASE_TYPE \\\n", | |
"1717 4679963 31-2016-04011 3D M \n", | |
"1722 4679963 31-2016-04011 3D M \n", | |
"1791 4650239 31-2002-05380 3D M \n", | |
"2501 4650504 32-2002-04093 3D F \n", | |
"3532 4636013 P2-1999-1892A P F \n", | |
"\n", | |
" CASE_FILING ARREST_AGENCY \\\n", | |
"1717 2016-04-25T00:00:00.000000000 WARWICK POLICE DEPARTMENT \n", | |
"1722 2016-04-25T00:00:00.000000000 WARWICK POLICE DEPARTMENT \n", | |
"1791 2002-11-29T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"2501 2002-09-07T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"3532 1999-04-13T00:00:00.000000000 NaN \n", | |
"\n", | |
" ARREST_DATE CHARGE_NUMBER CHARGE_CODE CHARGE_MAINT \\\n", | |
"1717 NaT 2 14376 NaN \n", | |
"1722 NaT 3 14376 NaN \n", | |
"1791 NaT 2 14376 NaN \n", | |
"2501 2002-09-07T00:00:00.000000000 2 14586 NaN \n", | |
"3532 1999-04-13T00:00:00.000000000 101 14581 NaN \n", | |
"\n", | |
" CHARGE_FILING CHARGE_CITY CHARGE_DISP \\\n", | |
"1717 2016-04-25T00:00:00.000000000 WARWICK GPNOL \n", | |
"1722 2016-04-25T00:00:00.000000000 WARWICK GPNOL \n", | |
"1791 2002-11-29T00:00:00.000000000 COVENTRY DM48A \n", | |
"2501 2002-09-07T00:00:00.000000000 COVENTRY NCNIS \n", | |
"3532 1999-04-13T00:00:00.000000000 BARRINGTON CGINS \n", | |
"\n", | |
" CHARGE_DATE PLEA_DISP PLEA_MAINT PLEA_DATE PROSECUTOR \\\n", | |
"1717 2016-04-25T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1722 2016-04-25T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1791 2002-12-02T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"2501 2002-10-24T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"3532 1999-05-03T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"\n", | |
" ATTORNEY \n", | |
"1717 NaN \n", | |
"1722 NaN \n", | |
"1791 NaN \n", | |
"2501 NaN \n", | |
"3532 NaN " | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Count how many records there are" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3695" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.shape[0]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Save unique case numbers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"3281" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"unique_cases = records.drop_duplicates('CASE_NO')\n", | |
"unique_cases.shape[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"unique_cases['CASE_NO'].to_csv('unique_cases.csv', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Save unique identifiers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"2476" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"unique_ids = records.drop_duplicates('IDENTIFIER')\n", | |
"unique_ids.shape[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"unique_ids['IDENTIFIER'].to_csv('unique_identifier.csv', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Count up how many of each type we found" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>description</th>\n", | |
" <th>count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>14581</td>\n", | |
" <td>Assault on Person > 60 Causing Bodily Injury</td>\n", | |
" <td>1338</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>14376</td>\n", | |
" <td>Larceny < 500 Person 65+</td>\n", | |
" <td>788</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>11-5-10A</td>\n", | |
" <td>Domestic Violence - Assault Person > 60 Causin...</td>\n", | |
" <td>403</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>14377</td>\n", | |
" <td>Larceny > 500 Person 65+</td>\n", | |
" <td>370</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>14586</td>\n", | |
" <td>Domestic Violence - Assault Person > 60 Causin...</td>\n", | |
" <td>166</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>14880</td>\n", | |
" <td>Exploitation of an Elder < 500</td>\n", | |
" <td>125</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>14585</td>\n", | |
" <td>Assault on Person > 60 Causing Serious Bodily ...</td>\n", | |
" <td>71</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>14582</td>\n", | |
" <td>Domestic Violence - Assault Person > 60 Causin...</td>\n", | |
" <td>22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>14588</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>14590</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>11-68-2</td>\n", | |
" <td>EXPLOITATION OF AN ELDER</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>14389</td>\n", | |
" <td>Larceny From the Person > 500 Person 65+</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>14881</td>\n", | |
" <td>Exploitation of an Elder > 500 < 100,000</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" CHARGE_CODE description count\n", | |
"0 14581 Assault on Person > 60 Causing Bodily Injury 1338\n", | |
"1 14376 Larceny < 500 Person 65+ 788\n", | |
"2 11-5-10A Domestic Violence - Assault Person > 60 Causin... 403\n", | |
"3 14377 Larceny > 500 Person 65+ 370\n", | |
"4 14586 Domestic Violence - Assault Person > 60 Causin... 166\n", | |
"5 14880 Exploitation of an Elder < 500 125\n", | |
"6 14585 Assault on Person > 60 Causing Serious Bodily ... 71\n", | |
"7 14582 Domestic Violence - Assault Person > 60 Causin... 22\n", | |
"8 14588 Assault on Person > 60 by Caretaker Causing Bo... 20\n", | |
"9 14590 Assault on Person > 60 by Caretaker Causing Se... 3\n", | |
"10 11-68-2 EXPLOITATION OF AN ELDER 1\n", | |
"11 14389 Larceny From the Person > 500 Person 65+ 1\n", | |
"12 14881 Exploitation of an Elder > 500 < 100,000 1" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"counts = records.drop_duplicates(['CASE_NO','CHARGE_CODE'])\\\n", | |
" .groupby('CHARGE_CODE', as_index=False).count()\\\n", | |
" .merge(all_codes, left_on='CHARGE_CODE', right_on='odyssey', how='left')\\\n", | |
" [['CHARGE_CODE','description','IDENTIFIER']]\\\n", | |
" .sort_values('IDENTIFIER', ascending=False)\\\n", | |
" .rename(columns={'IDENTIFIER': 'count'})\\\n", | |
" .reset_index(drop=True)\n", | |
"counts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"counts.to_csv('charge_counts.csv', index=False)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment