Created
November 20, 2017 22:11
-
-
Save kforeman/055ecfa1d2d32e0952e6a456bb5015aa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"import pandas as pd\n", | |
"import seaborn as sns\n", | |
"import os, sys" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Make a list of all the codes we're interested in\n", | |
"_i.e. first two pages of Asher's Excel spreadsheet_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>level_1</th>\n", | |
" <th>addtl</th>\n", | |
" <th>description</th>\n", | |
" <th>odyssey</th>\n", | |
" <th>statute</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" <td>14588</td>\n", | |
" <td>11-5-10.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" <td>14589</td>\n", | |
" <td>11-5-10.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" <td>14590</td>\n", | |
" <td>11-5-10.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>3</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" <td>14591</td>\n", | |
" <td>11-5-10.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 Causing Bodily Injury</td>\n", | |
" <td>11187</td>\n", | |
" <td>11-5-10</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" level_1 addtl description odyssey \\\n", | |
"0 0 NaN Assault on Person > 60 by Caretaker Causing Bo... 14588 \n", | |
"0 1 NaN Assault on Person > 60 by Caretaker Causing Bo... 14589 \n", | |
"0 2 NaN Assault on Person > 60 by Caretaker Causing Se... 14590 \n", | |
"0 3 NaN Assault on Person > 60 by Caretaker Causing Se... 14591 \n", | |
"0 4 NaN Assault on Person > 60 Causing Bodily Injury 11187 \n", | |
"\n", | |
" statute \n", | |
"0 11-5-10.3 \n", | |
"0 11-5-10.3 \n", | |
"0 11-5-10.4 \n", | |
"0 11-5-10.4 \n", | |
"0 11-5-10 " | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"all_codes = pd.concat(pd.read_excel('../data/raw/Elder Offense Codes FINAL.xlsx', [0,1], skiprows=1)).reset_index(True)\n", | |
"all_codes.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"statute = all_codes['statute'].dropna().unique().tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"odyssey = all_codes['odyssey'].dropna().unique().tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'11-5-10.3|11-5-10.4|11-5-10|11-5-10.1|11-5-10 and 12-29-2(a)(2)|11-5-10.1 and 12-29-2(a)(2)|11-68-2|11-68-2(a)(1)|11-68-2(a)(3)|11-68-2(a)(2)|11-41-1|11-41-7|11-39-1|11-5-10.2|11-5-11|11-5-12|23-17.8-1(a)(1)(i)to(iv)|11-5-12(a)|23-17.8-1(a)(1)(i) to (iv)|23-17.8-1A1i-iv|23-17.8-1E|23-17.8-1|40.1-27-1|40.1-27-1/M|11-41-5(b)|14588|14589|14590|14591|11187|14581|14585|11-5-10|11-5-10.3|11050100|11050101J|11-5-10A|14582|14583|14584|14586|11-68-2|14880|14882|14881|11061|14376|11062|14377|11068|14388|11069|14389|11-41-5(b)|11050104|11050101|11-5-10.1A|11-5-10.1|11050102|11050102J|11050110|11050120|11390010|11-5-10.2|11-5-11|11-5-12F|11-5-12M|11585|11605|14587|14592|14594|15403|23-17.8-1A1i-iv|23-17.8-1E|23-17.8-1F|23178011|23178012|23178013|40.1-27-1F|40.1-27-1M|401271A1'" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"charges = '|'.join([str(c) for c in statute + odyssey if c != ' '])\n", | |
"charges" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Load in the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_hdf(os.path.join(os.pardir, 'data', 'clean', 'CRIM_CASE.h5'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IDENTIFIER</th>\n", | |
" <th>CASE_NO</th>\n", | |
" <th>CASE_LOCN</th>\n", | |
" <th>CASE_TYPE</th>\n", | |
" <th>CASE_FILING</th>\n", | |
" <th>ARREST_AGENCY</th>\n", | |
" <th>ARREST_DATE</th>\n", | |
" <th>CHARGE_NUMBER</th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>CHARGE_MAINT</th>\n", | |
" <th>CHARGE_FILING</th>\n", | |
" <th>CHARGE_CITY</th>\n", | |
" <th>CHARGE_DISP</th>\n", | |
" <th>CHARGE_DATE</th>\n", | |
" <th>PLEA_DISP</th>\n", | |
" <th>PLEA_MAINT</th>\n", | |
" <th>PLEA_DATE</th>\n", | |
" <th>PROSECUTOR</th>\n", | |
" <th>ATTORNEY</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4680901</td>\n", | |
" <td>31-2016-02909</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-09T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4551414</td>\n", | |
" <td>31-2016-02925</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-09T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON</td>\n", | |
" <td>DMNON</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4550775</td>\n", | |
" <td>31-2016-03096</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-16T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4623177</td>\n", | |
" <td>31-2016-03286</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-13T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-13T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-27T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4658064</td>\n", | |
" <td>31-2016-03298</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>1</td>\n", | |
" <td>14383</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IDENTIFIER CASE_NO CASE_LOCN CASE_TYPE \\\n", | |
"0 4680901 31-2016-02909 3D M \n", | |
"1 4551414 31-2016-02925 3D M \n", | |
"2 4550775 31-2016-03096 3D M \n", | |
"3 4623177 31-2016-03286 3D M \n", | |
"4 4658064 31-2016-03298 3D M \n", | |
"\n", | |
" CASE_FILING ARREST_AGENCY \\\n", | |
"0 2016-03-29T00:00:00.000000000 CRANSTON POLICE DEPARTMENT \n", | |
"1 2016-03-29T00:00:00.000000000 CRANSTON POLICE DEPARTMENT \n", | |
"2 2016-03-30T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"3 2016-04-13T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"4 2016-04-18T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"\n", | |
" ARREST_DATE CHARGE_NUMBER CHARGE_CODE CHARGE_MAINT \\\n", | |
"0 2016-03-09T00:00:00.000000000 1 003 NaN \n", | |
"1 2016-03-09T00:00:00.000000000 1 003 NaN \n", | |
"2 2016-03-16T00:00:00.000000000 1 003 NaN \n", | |
"3 NaT 1 003 NaN \n", | |
"4 NaT 1 14383 NaN \n", | |
"\n", | |
" CHARGE_FILING CHARGE_CITY CHARGE_DISP \\\n", | |
"0 2016-03-29T00:00:00.000000000 CRANSTON GPNOL \n", | |
"1 2016-03-29T00:00:00.000000000 CRANSTON DMNON \n", | |
"2 2016-03-30T00:00:00.000000000 COVENTRY GPNOL \n", | |
"3 2016-04-13T00:00:00.000000000 COVENTRY GPNOL \n", | |
"4 2016-04-18T00:00:00.000000000 COVENTRY GPNOL \n", | |
"\n", | |
" CHARGE_DATE PLEA_DISP PLEA_MAINT PLEA_DATE PROSECUTOR \\\n", | |
"0 2016-03-29T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1 2016-03-30T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"2 2016-03-30T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"3 2016-04-27T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"4 2016-04-18T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"\n", | |
" ATTORNEY \n", | |
"0 NaN \n", | |
"1 NaN \n", | |
"2 NaN \n", | |
"3 NaN \n", | |
"4 NaN " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Find all of the records that have some version of the desired charge codes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/kfor/.conda/envs/forecast-2-fork/lib/python2.7/site-packages/ipykernel_launcher.py:1: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
} | |
], | |
"source": [ | |
"records = df.loc[df['CHARGE_CODE'].str.contains(charges)]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"_drop 11-41-10, 11-41-11, etc since we're only interested in 11-41-1[a-z]_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array(['11-41-11F', '11-41-11.1M', '11-41-11M', '11-41-16.1F', '11-41-13',\n", | |
" '11-41-12', '11-41-14', '11-41-10', '11-41-19', '11-41-17'], dtype=object)" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.ix[records['CHARGE_CODE'].str.startswith('11-41-1'), 'CHARGE_CODE'].unique()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"records = records.loc[records['CHARGE_CODE'].isin(['11-41-11F', '11-41-11.1M', '11-41-11M', '11-41-16.1F', \n", | |
" '11-41-13', '11-41-12', '11-41-14', '11-41-10', \n", | |
" '11-41-19', '11-41-17']) == False]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IDENTIFIER</th>\n", | |
" <th>CASE_NO</th>\n", | |
" <th>CASE_LOCN</th>\n", | |
" <th>CASE_TYPE</th>\n", | |
" <th>CASE_FILING</th>\n", | |
" <th>ARREST_AGENCY</th>\n", | |
" <th>ARREST_DATE</th>\n", | |
" <th>CHARGE_NUMBER</th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>CHARGE_MAINT</th>\n", | |
" <th>CHARGE_FILING</th>\n", | |
" <th>CHARGE_CITY</th>\n", | |
" <th>CHARGE_DISP</th>\n", | |
" <th>CHARGE_DATE</th>\n", | |
" <th>PLEA_DISP</th>\n", | |
" <th>PLEA_MAINT</th>\n", | |
" <th>PLEA_DATE</th>\n", | |
" <th>PROSECUTOR</th>\n", | |
" <th>ATTORNEY</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1717</th>\n", | |
" <td>4679963</td>\n", | |
" <td>31-2016-04011</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>2</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1722</th>\n", | |
" <td>4679963</td>\n", | |
" <td>31-2016-04011</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>3</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1791</th>\n", | |
" <td>4650239</td>\n", | |
" <td>31-2002-05380</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2002-11-29T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>2</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2002-11-29T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>DM48A</td>\n", | |
" <td>2002-12-02T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2257</th>\n", | |
" <td>4640772</td>\n", | |
" <td>32-1999-04942</td>\n", | |
" <td>3D</td>\n", | |
" <td>F</td>\n", | |
" <td>1999-10-15T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1999-10-15T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>11390010</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1999-10-15T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>NCNIS</td>\n", | |
" <td>1999-11-29T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2501</th>\n", | |
" <td>4650504</td>\n", | |
" <td>32-2002-04093</td>\n", | |
" <td>3D</td>\n", | |
" <td>F</td>\n", | |
" <td>2002-09-07T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>2002-09-07T00:00:00.000000000</td>\n", | |
" <td>2</td>\n", | |
" <td>14586</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2002-09-07T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>NCNIS</td>\n", | |
" <td>2002-10-24T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IDENTIFIER CASE_NO CASE_LOCN CASE_TYPE \\\n", | |
"1717 4679963 31-2016-04011 3D M \n", | |
"1722 4679963 31-2016-04011 3D M \n", | |
"1791 4650239 31-2002-05380 3D M \n", | |
"2257 4640772 32-1999-04942 3D F \n", | |
"2501 4650504 32-2002-04093 3D F \n", | |
"\n", | |
" CASE_FILING ARREST_AGENCY \\\n", | |
"1717 2016-04-25T00:00:00.000000000 WARWICK POLICE DEPARTMENT \n", | |
"1722 2016-04-25T00:00:00.000000000 WARWICK POLICE DEPARTMENT \n", | |
"1791 2002-11-29T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"2257 1999-10-15T00:00:00.000000000 NaN \n", | |
"2501 2002-09-07T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"\n", | |
" ARREST_DATE CHARGE_NUMBER CHARGE_CODE CHARGE_MAINT \\\n", | |
"1717 NaT 2 14376 NaN \n", | |
"1722 NaT 3 14376 NaN \n", | |
"1791 NaT 2 14376 NaN \n", | |
"2257 1999-10-15T00:00:00.000000000 1 11390010 NaN \n", | |
"2501 2002-09-07T00:00:00.000000000 2 14586 NaN \n", | |
"\n", | |
" CHARGE_FILING CHARGE_CITY CHARGE_DISP \\\n", | |
"1717 2016-04-25T00:00:00.000000000 WARWICK GPNOL \n", | |
"1722 2016-04-25T00:00:00.000000000 WARWICK GPNOL \n", | |
"1791 2002-11-29T00:00:00.000000000 COVENTRY DM48A \n", | |
"2257 1999-10-15T00:00:00.000000000 COVENTRY NCNIS \n", | |
"2501 2002-09-07T00:00:00.000000000 COVENTRY NCNIS \n", | |
"\n", | |
" CHARGE_DATE PLEA_DISP PLEA_MAINT PLEA_DATE PROSECUTOR \\\n", | |
"1717 2016-04-25T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1722 2016-04-25T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1791 2002-12-02T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"2257 1999-11-29T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"2501 2002-10-24T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"\n", | |
" ATTORNEY \n", | |
"1717 NaN \n", | |
"1722 NaN \n", | |
"1791 NaN \n", | |
"2257 NaN \n", | |
"2501 NaN " | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Count how many records there are" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"10459" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.shape[0]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Save unique case numbers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"7912" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"unique_cases = records.drop_duplicates('CASE_NO')\n", | |
"unique_cases.shape[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"unique_cases['CASE_NO'].to_csv('unique_cases.csv', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Count up how many of each type we found" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>description</th>\n", | |
" <th>count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>11390010</td>\n", | |
" <td>NaN</td>\n", | |
" <td>4294</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>14581</td>\n", | |
" <td>Assault on Person > 60 Causing Bodily Injury</td>\n", | |
" <td>1338</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>14376</td>\n", | |
" <td>Larceny < 500 Person 65+</td>\n", | |
" <td>788</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>11-5-10A</td>\n", | |
" <td>Domestic Violence - Assault Person > 60 Causin...</td>\n", | |
" <td>403</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>14377</td>\n", | |
" <td>Larceny > 500 Person 65+</td>\n", | |
" <td>370</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>14592</td>\n", | |
" <td>Assault on Person With Severe Impairments</td>\n", | |
" <td>169</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>14586</td>\n", | |
" <td>Domestic Violence - Assault Person > 60 Causin...</td>\n", | |
" <td>166</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>14880</td>\n", | |
" <td>Exploitation of an Elder < 500</td>\n", | |
" <td>125</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>14585</td>\n", | |
" <td>Assault on Person > 60 Causing Serious Bodily ...</td>\n", | |
" <td>71</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>40.1-27-1F</td>\n", | |
" <td>PATIENT ABUSE/FELONY</td>\n", | |
" <td>57</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10</th>\n", | |
" <td>23-17.8-1F</td>\n", | |
" <td>PATIENT ABUSE/FELONY</td>\n", | |
" <td>56</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>11</th>\n", | |
" <td>11-39-1A1D</td>\n", | |
" <td>NaN</td>\n", | |
" <td>54</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>11-39-1A2D</td>\n", | |
" <td>NaN</td>\n", | |
" <td>50</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>14594</td>\n", | |
" <td>Abuse, Neglect, and/or Exploitation of Adults ...</td>\n", | |
" <td>28</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>14582</td>\n", | |
" <td>Domestic Violence - Assault Person > 60 Causin...</td>\n", | |
" <td>22</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>14588</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" <td>20</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>14587</td>\n", | |
" <td>Assault on Person With Severe Impairments Caus...</td>\n", | |
" <td>15</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>17</th>\n", | |
" <td>23-17.8-1M</td>\n", | |
" <td>NaN</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>18</th>\n", | |
" <td>23-17.8-1E</td>\n", | |
" <td>PATIENT MISTREATMENT</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>19</th>\n", | |
" <td>14590</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" <td>3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>20</th>\n", | |
" <td>14389</td>\n", | |
" <td>Larceny From the Person > 500 Person 65+</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>21</th>\n", | |
" <td>14881</td>\n", | |
" <td>Exploitation of an Elder > 500 < 100,000</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>22</th>\n", | |
" <td>11-68-2</td>\n", | |
" <td>EXPLOITATION OF AN ELDER</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23</th>\n", | |
" <td>40.1-27-10</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" CHARGE_CODE description count\n", | |
"0 11390010 NaN 4294\n", | |
"1 14581 Assault on Person > 60 Causing Bodily Injury 1338\n", | |
"2 14376 Larceny < 500 Person 65+ 788\n", | |
"3 11-5-10A Domestic Violence - Assault Person > 60 Causin... 403\n", | |
"4 14377 Larceny > 500 Person 65+ 370\n", | |
"5 14592 Assault on Person With Severe Impairments 169\n", | |
"6 14586 Domestic Violence - Assault Person > 60 Causin... 166\n", | |
"7 14880 Exploitation of an Elder < 500 125\n", | |
"8 14585 Assault on Person > 60 Causing Serious Bodily ... 71\n", | |
"9 40.1-27-1F PATIENT ABUSE/FELONY 57\n", | |
"10 23-17.8-1F PATIENT ABUSE/FELONY 56\n", | |
"11 11-39-1A1D NaN 54\n", | |
"12 11-39-1A2D NaN 50\n", | |
"13 14594 Abuse, Neglect, and/or Exploitation of Adults ... 28\n", | |
"14 14582 Domestic Violence - Assault Person > 60 Causin... 22\n", | |
"15 14588 Assault on Person > 60 by Caretaker Causing Bo... 20\n", | |
"16 14587 Assault on Person With Severe Impairments Caus... 15\n", | |
"17 23-17.8-1M NaN 7\n", | |
"18 23-17.8-1E PATIENT MISTREATMENT 5\n", | |
"19 14590 Assault on Person > 60 by Caretaker Causing Se... 3\n", | |
"20 14389 Larceny From the Person > 500 Person 65+ 1\n", | |
"21 14881 Exploitation of an Elder > 500 < 100,000 1\n", | |
"22 11-68-2 EXPLOITATION OF AN ELDER 1\n", | |
"23 40.1-27-10 NaN 1" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"counts = records.drop_duplicates(['CASE_NO','CHARGE_CODE'])\\\n", | |
" .groupby('CHARGE_CODE', as_index=False).count()\\\n", | |
" .merge(all_codes, left_on='CHARGE_CODE', right_on='odyssey', how='left')\\\n", | |
" [['CHARGE_CODE','description','IDENTIFIER']]\\\n", | |
" .sort_values('IDENTIFIER', ascending=False)\\\n", | |
" .rename(columns={'IDENTIFIER': 'count'})\\\n", | |
" .reset_index(drop=True)\n", | |
"counts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"counts.to_csv('charge_counts.csv', index=False)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment