Created
November 20, 2017 07:30
-
-
Save kforeman/bd1cbbc2172ef53c8cb72b32653c6b2d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline\n", | |
"import pandas as pd\n", | |
"import seaborn as sns\n", | |
"import os, sys" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Make a list of all the codes we're interested in\n", | |
"_i.e. first two pages of Asher's Excel spreadsheet_" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>addtl</th>\n", | |
" <th>descr</th>\n", | |
" <th>description</th>\n", | |
" <th>odyssey</th>\n", | |
" <th>statute</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">0</th>\n", | |
" <th>0</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" <td>14588</td>\n", | |
" <td>11-5-10.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Bo...</td>\n", | |
" <td>14589</td>\n", | |
" <td>11-5-10.3</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" <td>14590</td>\n", | |
" <td>11-5-10.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 by Caretaker Causing Se...</td>\n", | |
" <td>14591</td>\n", | |
" <td>11-5-10.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Assault on Person > 60 Causing Bodily Injury</td>\n", | |
" <td>11187</td>\n", | |
" <td>11-5-10</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" addtl descr description odyssey \\\n", | |
"0 0 NaN NaN Assault on Person > 60 by Caretaker Causing Bo... 14588 \n", | |
" 1 NaN NaN Assault on Person > 60 by Caretaker Causing Bo... 14589 \n", | |
" 2 NaN NaN Assault on Person > 60 by Caretaker Causing Se... 14590 \n", | |
" 3 NaN NaN Assault on Person > 60 by Caretaker Causing Se... 14591 \n", | |
" 4 NaN NaN Assault on Person > 60 Causing Bodily Injury 11187 \n", | |
"\n", | |
" statute \n", | |
"0 0 11-5-10.3 \n", | |
" 1 11-5-10.3 \n", | |
" 2 11-5-10.4 \n", | |
" 3 11-5-10.4 \n", | |
" 4 11-5-10 " | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"all_codes = pd.concat(pd.read_excel('../data/raw/Elder Offense Codes FINAL.xlsx', [0,1], skiprows=1))\n", | |
"all_codes.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"statute = all_codes['statute'].dropna().unique().tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"odyssey = all_codes['odyssey'].dropna().unique().tolist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'11-5-10.3|11-5-10.4|11-5-10|11-5-10.1|11-5-10 and 12-29-2(a)(2)|11-5-10.1 and 12-29-2(a)(2)|11-8-2.3 and 12-29-2(a)(13)|11-68-2|11-68-2(a)(1)|11-68-2(a)(3)|11-68-2(a)(2)|11-41-1|11-41-7|11-39-1|11-5-10.2|11-5-11|11-5-12|23-17.8-1(a)(1)(i)to(iv)|11-5-12(a)|23-17.8-1(a)(1)(i) to (iv)|23-17.8-1A1i-iv|23-17.8-1E|23-17.8-1|40.1-27-1|40.1-27-1/M|11-41-5(b)|14588|14589|14590|14591|11187|14581|14585|11-5-10|11-5-10.3|11050100|11050101J|11-5-10A|14582|14583|14584|14586|14472|14473|11-68-2|14880|14882|14881|11061|14376|11062|14377|11068|14388|11069|14389|11-41-5(b)|11050104|11050101|11-5-10.1A|11-5-10.1|11050102|11050102J|11050110|11050120|11390010|11-5-10.2|11-5-11|11-5-12F|11-5-12M|11585|11605|14587|14592|14594|15403|23-17.8-1A1i-iv|23-17.8-1E|23-17.8-1F|23178011|23178012|23178013|40.1-27-1F|40.1-27-1M|401271A1'" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"charges = '|'.join([str(c) for c in statute + odyssey if c != ' '])\n", | |
"charges" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Load in the data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.read_hdf(os.path.join(os.pardir, 'data', 'clean', 'CRIM_CASE.h5'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IDENTIFIER</th>\n", | |
" <th>CASE_NO</th>\n", | |
" <th>CASE_LOCN</th>\n", | |
" <th>CASE_TYPE</th>\n", | |
" <th>CASE_FILING</th>\n", | |
" <th>ARREST_AGENCY</th>\n", | |
" <th>ARREST_DATE</th>\n", | |
" <th>CHARGE_NUMBER</th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>CHARGE_MAINT</th>\n", | |
" <th>CHARGE_FILING</th>\n", | |
" <th>CHARGE_CITY</th>\n", | |
" <th>CHARGE_DISP</th>\n", | |
" <th>CHARGE_DATE</th>\n", | |
" <th>PLEA_DISP</th>\n", | |
" <th>PLEA_MAINT</th>\n", | |
" <th>PLEA_DATE</th>\n", | |
" <th>PROSECUTOR</th>\n", | |
" <th>ATTORNEY</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>4680901</td>\n", | |
" <td>31-2016-02909</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-09T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4551414</td>\n", | |
" <td>31-2016-02925</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-09T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-29T00:00:00.000000000</td>\n", | |
" <td>CRANSTON</td>\n", | |
" <td>DMNON</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4550775</td>\n", | |
" <td>31-2016-03096</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>2016-03-16T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-03-30T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4623177</td>\n", | |
" <td>31-2016-03286</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-13T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>1</td>\n", | |
" <td>003</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-13T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-27T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4658064</td>\n", | |
" <td>31-2016-03298</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>COVENTRY POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>1</td>\n", | |
" <td>14383</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>COVENTRY</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-18T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IDENTIFIER CASE_NO CASE_LOCN CASE_TYPE \\\n", | |
"0 4680901 31-2016-02909 3D M \n", | |
"1 4551414 31-2016-02925 3D M \n", | |
"2 4550775 31-2016-03096 3D M \n", | |
"3 4623177 31-2016-03286 3D M \n", | |
"4 4658064 31-2016-03298 3D M \n", | |
"\n", | |
" CASE_FILING ARREST_AGENCY \\\n", | |
"0 2016-03-29T00:00:00.000000000 CRANSTON POLICE DEPARTMENT \n", | |
"1 2016-03-29T00:00:00.000000000 CRANSTON POLICE DEPARTMENT \n", | |
"2 2016-03-30T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"3 2016-04-13T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"4 2016-04-18T00:00:00.000000000 COVENTRY POLICE DEPARTMENT \n", | |
"\n", | |
" ARREST_DATE CHARGE_NUMBER CHARGE_CODE CHARGE_MAINT \\\n", | |
"0 2016-03-09T00:00:00.000000000 1 003 NaN \n", | |
"1 2016-03-09T00:00:00.000000000 1 003 NaN \n", | |
"2 2016-03-16T00:00:00.000000000 1 003 NaN \n", | |
"3 NaT 1 003 NaN \n", | |
"4 NaT 1 14383 NaN \n", | |
"\n", | |
" CHARGE_FILING CHARGE_CITY CHARGE_DISP \\\n", | |
"0 2016-03-29T00:00:00.000000000 CRANSTON GPNOL \n", | |
"1 2016-03-29T00:00:00.000000000 CRANSTON DMNON \n", | |
"2 2016-03-30T00:00:00.000000000 COVENTRY GPNOL \n", | |
"3 2016-04-13T00:00:00.000000000 COVENTRY GPNOL \n", | |
"4 2016-04-18T00:00:00.000000000 COVENTRY GPNOL \n", | |
"\n", | |
" CHARGE_DATE PLEA_DISP PLEA_MAINT PLEA_DATE PROSECUTOR \\\n", | |
"0 2016-03-29T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1 2016-03-30T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"2 2016-03-30T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"3 2016-04-27T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"4 2016-04-18T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"\n", | |
" ATTORNEY \n", | |
"0 NaN \n", | |
"1 NaN \n", | |
"2 NaN \n", | |
"3 NaN \n", | |
"4 NaN " | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Find all of the records that have some version of the desired charge codes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/kfor/.conda/envs/forecast-2-fork/lib/python2.7/site-packages/ipykernel_launcher.py:1: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
} | |
], | |
"source": [ | |
"records = df.loc[df['CHARGE_CODE'].str.contains(charges)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IDENTIFIER</th>\n", | |
" <th>CASE_NO</th>\n", | |
" <th>CASE_LOCN</th>\n", | |
" <th>CASE_TYPE</th>\n", | |
" <th>CASE_FILING</th>\n", | |
" <th>ARREST_AGENCY</th>\n", | |
" <th>ARREST_DATE</th>\n", | |
" <th>CHARGE_NUMBER</th>\n", | |
" <th>CHARGE_CODE</th>\n", | |
" <th>CHARGE_MAINT</th>\n", | |
" <th>CHARGE_FILING</th>\n", | |
" <th>CHARGE_CITY</th>\n", | |
" <th>CHARGE_DISP</th>\n", | |
" <th>CHARGE_DATE</th>\n", | |
" <th>PLEA_DISP</th>\n", | |
" <th>PLEA_MAINT</th>\n", | |
" <th>PLEA_DATE</th>\n", | |
" <th>PROSECUTOR</th>\n", | |
" <th>ATTORNEY</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>337</th>\n", | |
" <td>4791168</td>\n", | |
" <td>32-1997-06002</td>\n", | |
" <td>3D</td>\n", | |
" <td>F</td>\n", | |
" <td>1997-12-12T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1997-12-12T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>11-41-11F</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1997-12-12T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>CGINS</td>\n", | |
" <td>1998-01-12T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>697</th>\n", | |
" <td>4791169</td>\n", | |
" <td>32-1997-06003</td>\n", | |
" <td>3D</td>\n", | |
" <td>F</td>\n", | |
" <td>1997-12-12T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1997-12-12T00:00:00.000000000</td>\n", | |
" <td>1</td>\n", | |
" <td>11-41-11F</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1997-12-12T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>CGINS</td>\n", | |
" <td>1998-01-12T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1365</th>\n", | |
" <td>4641670</td>\n", | |
" <td>32-1999-05699</td>\n", | |
" <td>3D</td>\n", | |
" <td>F</td>\n", | |
" <td>1999-12-04T00:00:00.000000000</td>\n", | |
" <td>WARWICK POLICE DEPARTMENT</td>\n", | |
" <td>1999-12-04T00:00:00.000000000</td>\n", | |
" <td>2</td>\n", | |
" <td>11-41-11.1M</td>\n", | |
" <td>NaN</td>\n", | |
" <td>1999-12-04T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>CGINA</td>\n", | |
" <td>1999-12-17T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1717</th>\n", | |
" <td>4679963</td>\n", | |
" <td>31-2016-04011</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>2</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1722</th>\n", | |
" <td>4679963</td>\n", | |
" <td>31-2016-04011</td>\n", | |
" <td>3D</td>\n", | |
" <td>M</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK POLICE DEPARTMENT</td>\n", | |
" <td>NaT</td>\n", | |
" <td>3</td>\n", | |
" <td>14376</td>\n", | |
" <td>NaN</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>WARWICK</td>\n", | |
" <td>GPNOL</td>\n", | |
" <td>2016-04-25T00:00:00.000000000</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaT</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IDENTIFIER CASE_NO CASE_LOCN CASE_TYPE \\\n", | |
"337 4791168 32-1997-06002 3D F \n", | |
"697 4791169 32-1997-06003 3D F \n", | |
"1365 4641670 32-1999-05699 3D F \n", | |
"1717 4679963 31-2016-04011 3D M \n", | |
"1722 4679963 31-2016-04011 3D M \n", | |
"\n", | |
" CASE_FILING ARREST_AGENCY \\\n", | |
"337 1997-12-12T00:00:00.000000000 NaN \n", | |
"697 1997-12-12T00:00:00.000000000 NaN \n", | |
"1365 1999-12-04T00:00:00.000000000 WARWICK POLICE DEPARTMENT \n", | |
"1717 2016-04-25T00:00:00.000000000 WARWICK POLICE DEPARTMENT \n", | |
"1722 2016-04-25T00:00:00.000000000 WARWICK POLICE DEPARTMENT \n", | |
"\n", | |
" ARREST_DATE CHARGE_NUMBER CHARGE_CODE CHARGE_MAINT \\\n", | |
"337 1997-12-12T00:00:00.000000000 1 11-41-11F NaN \n", | |
"697 1997-12-12T00:00:00.000000000 1 11-41-11F NaN \n", | |
"1365 1999-12-04T00:00:00.000000000 2 11-41-11.1M NaN \n", | |
"1717 NaT 2 14376 NaN \n", | |
"1722 NaT 3 14376 NaN \n", | |
"\n", | |
" CHARGE_FILING CHARGE_CITY CHARGE_DISP \\\n", | |
"337 1997-12-12T00:00:00.000000000 WARWICK CGINS \n", | |
"697 1997-12-12T00:00:00.000000000 WARWICK CGINS \n", | |
"1365 1999-12-04T00:00:00.000000000 WARWICK CGINA \n", | |
"1717 2016-04-25T00:00:00.000000000 WARWICK GPNOL \n", | |
"1722 2016-04-25T00:00:00.000000000 WARWICK GPNOL \n", | |
"\n", | |
" CHARGE_DATE PLEA_DISP PLEA_MAINT PLEA_DATE PROSECUTOR \\\n", | |
"337 1998-01-12T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"697 1998-01-12T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1365 1999-12-17T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1717 2016-04-25T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"1722 2016-04-25T00:00:00.000000000 NaN NaN NaT NaN \n", | |
"\n", | |
" ATTORNEY \n", | |
"337 NaN \n", | |
"697 NaN \n", | |
"1365 NaN \n", | |
"1717 NaN \n", | |
"1722 NaN " | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.head()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Count how many records there are" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"12718" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.shape[0]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Count how many there are after removing duplicate case numbers" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"9479" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"records.drop_duplicates('CASE_NO').shape[0]" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment