Skip to content

Instantly share code, notes, and snippets.

@nipunbatra
Created August 10, 2014 23:01
Show Gist options
  • Save nipunbatra/360ddc273e5cdf6f756e to your computer and use it in GitHub Desktop.
Save nipunbatra/360ddc273e5cdf6f756e to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:e8fc1e93642193fe04e3c33701dc7dbccf87d36d917fc25d60699fdf96dbc620"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"from sklearn import svm\n",
"from datetime import datetime, timedelta\n",
"from cStringIO import StringIO\n",
"from datetime import datetime, timedelta\n",
"from cStringIO import StringIO"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cdf = pd.read_csv('crime.csv', header=0)\n",
"#nbrhd_data = pd.read_csv ('/home/prasad/Desktop/pythonProj/census_neighborhood_demographics_2010.csv', header = 0)\n",
"#data = pd.read_fwf(\"/home/prasad/Desktop/pythonProj/wonder.txt\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cdf.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>INCIDENT_ID</th>\n",
" <th>OFFENSE_ID</th>\n",
" <th>OFFENSE_CODE</th>\n",
" <th>OFFENSE_CODE_EXTENSION</th>\n",
" <th>OFFENSE_TYPE_ID</th>\n",
" <th>OFFENSE_CATEGORY_ID</th>\n",
" <th>FIRST_OCCURRENCE_DATE</th>\n",
" <th>LAST_OCCURRENCE_DATE</th>\n",
" <th>REPORTED_DATE</th>\n",
" <th>INCIDENT_ADDRESS</th>\n",
" <th>GEO_X</th>\n",
" <th>GEO_Y</th>\n",
" <th>GEO_LON</th>\n",
" <th>GEO_LAT</th>\n",
" <th>DISTRICT_ID</th>\n",
" <th>PRECINCT_ID</th>\n",
" <th>NEIGHBORHOOD_ID</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 201413028</td>\n",
" <td> 201413028549900</td>\n",
" <td> 5499</td>\n",
" <td> 0</td>\n",
" <td> traf-other</td>\n",
" <td> all-other-crimes</td>\n",
" <td> 2014-01-08 19:40:00</td>\n",
" <td> NaN</td>\n",
" <td> 2014-01-08 20:22:00</td>\n",
" <td> NaN</td>\n",
" <td> 3130484</td>\n",
" <td> 1683392</td>\n",
" <td>-105.036215</td>\n",
" <td> 39.708798</td>\n",
" <td> 4</td>\n",
" <td> 412</td>\n",
" <td> westwood</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 201412608</td>\n",
" <td> 201412608230300</td>\n",
" <td> 2303</td>\n",
" <td> 0</td>\n",
" <td> theft-shoplift</td>\n",
" <td> larceny</td>\n",
" <td> 2014-01-08 15:23:00</td>\n",
" <td> NaN</td>\n",
" <td> 2014-01-08 17:00:00</td>\n",
" <td> NaN</td>\n",
" <td> 3165524</td>\n",
" <td> 1680769</td>\n",
" <td>-104.911735</td>\n",
" <td> 39.701040</td>\n",
" <td> 3</td>\n",
" <td> 322</td>\n",
" <td> washington-virginia-vale</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 201412199</td>\n",
" <td> 201412199131500</td>\n",
" <td> 1315</td>\n",
" <td> 0</td>\n",
" <td> aggravated-assault</td>\n",
" <td> aggravated-assault</td>\n",
" <td> 2014-01-08 10:30:00</td>\n",
" <td> NaN</td>\n",
" <td> 2014-01-08 10:57:00</td>\n",
" <td> 1101 N VERBENA ST</td>\n",
" <td> 3171282</td>\n",
" <td> 1692904</td>\n",
" <td>-104.890982</td>\n",
" <td> 39.734249</td>\n",
" <td> 2</td>\n",
" <td> 223</td>\n",
" <td> east-colfax</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 201412522</td>\n",
" <td> 201412522357200</td>\n",
" <td> 3572</td>\n",
" <td> 0</td>\n",
" <td> drug-methampetamine-possess</td>\n",
" <td> drug-alcohol</td>\n",
" <td> 2014-01-08 14:10:00</td>\n",
" <td> NaN</td>\n",
" <td> 2014-01-08 15:34:59</td>\n",
" <td> N SPEER BLVD / STOUT ST</td>\n",
" <td> 3140933</td>\n",
" <td> 1695324</td>\n",
" <td>-104.998843</td>\n",
" <td> 39.741401</td>\n",
" <td> 1</td>\n",
" <td> 123</td>\n",
" <td> cbd</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 201412745</td>\n",
" <td> 201412745410400</td>\n",
" <td> 4104</td>\n",
" <td> 0</td>\n",
" <td> liquor-possession</td>\n",
" <td> drug-alcohol</td>\n",
" <td> 2014-01-08 16:40:00</td>\n",
" <td> NaN</td>\n",
" <td> 2014-01-08 16:54:00</td>\n",
" <td> E COLFAX AVE / N FRANKLIN ST</td>\n",
" <td> 3149479</td>\n",
" <td> 1694866</td>\n",
" <td>-104.968463</td>\n",
" <td> 39.740011</td>\n",
" <td> 6</td>\n",
" <td> 622</td>\n",
" <td> cheesman-park</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
" INCIDENT_ID OFFENSE_ID OFFENSE_CODE OFFENSE_CODE_EXTENSION \\\n",
"0 201413028 201413028549900 5499 0 \n",
"1 201412608 201412608230300 2303 0 \n",
"2 201412199 201412199131500 1315 0 \n",
"3 201412522 201412522357200 3572 0 \n",
"4 201412745 201412745410400 4104 0 \n",
"\n",
" OFFENSE_TYPE_ID OFFENSE_CATEGORY_ID FIRST_OCCURRENCE_DATE \\\n",
"0 traf-other all-other-crimes 2014-01-08 19:40:00 \n",
"1 theft-shoplift larceny 2014-01-08 15:23:00 \n",
"2 aggravated-assault aggravated-assault 2014-01-08 10:30:00 \n",
"3 drug-methampetamine-possess drug-alcohol 2014-01-08 14:10:00 \n",
"4 liquor-possession drug-alcohol 2014-01-08 16:40:00 \n",
"\n",
" LAST_OCCURRENCE_DATE REPORTED_DATE INCIDENT_ADDRESS \\\n",
"0 NaN 2014-01-08 20:22:00 NaN \n",
"1 NaN 2014-01-08 17:00:00 NaN \n",
"2 NaN 2014-01-08 10:57:00 1101 N VERBENA ST \n",
"3 NaN 2014-01-08 15:34:59 N SPEER BLVD / STOUT ST \n",
"4 NaN 2014-01-08 16:54:00 E COLFAX AVE / N FRANKLIN ST \n",
"\n",
" GEO_X GEO_Y GEO_LON GEO_LAT DISTRICT_ID PRECINCT_ID \\\n",
"0 3130484 1683392 -105.036215 39.708798 4 412 \n",
"1 3165524 1680769 -104.911735 39.701040 3 322 \n",
"2 3171282 1692904 -104.890982 39.734249 2 223 \n",
"3 3140933 1695324 -104.998843 39.741401 1 123 \n",
"4 3149479 1694866 -104.968463 39.740011 6 622 \n",
"\n",
" NEIGHBORHOOD_ID \n",
"0 westwood \n",
"1 washington-virginia-vale \n",
"2 east-colfax \n",
"3 cbd \n",
"4 cheesman-park "
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cdf=cdf.rename(columns = {'NEIGHBORHOOD_ID':'NBRHD_NAME'})\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"cdf.columns"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 16,
"text": [
"Index([u'INCIDENT_ID', u'OFFENSE_ID', u'OFFENSE_CODE', u'OFFENSE_CODE_EXTENSION', u'OFFENSE_TYPE_ID', u'OFFENSE_CATEGORY_ID', u'FIRST_OCCURRENCE_DATE', u'LAST_OCCURRENCE_DATE', u'REPORTED_DATE', u'INCIDENT_ADDRESS', u'GEO_X', u'GEO_Y', u'GEO_LON', u'GEO_LAT', u'DISTRICT_ID', u'PRECINCT_ID', u'NBRHD_NAME'], dtype='object')"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf =cdf['DISTRICT_ID'].value_counts()\n",
"print dccdf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"3 54270\n",
"6 46249\n",
"1 41627\n",
"4 37007\n",
"2 36867\n",
"5 21032\n",
"7 1788\n",
"dtype: int64\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"type(dccdf)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 18,
"text": [
"pandas.core.series.Series"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf = pd.DataFrame(dccdf)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 54270</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 46249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 41627</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 37007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 36867</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 21032</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td> 1788</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 21,
"text": [
" 0\n",
"3 54270\n",
"6 46249\n",
"1 41627\n",
"4 37007\n",
"2 36867\n",
"5 21032\n",
"7 1788"
]
}
],
"prompt_number": 21
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf.index"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 22,
"text": [
"Int64Index([3, 6, 1, 4, 2, 5, 7], dtype='int64')"
]
}
],
"prompt_number": 22
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf['counts'] = dccdf.index"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 23
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>counts</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 54270</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 46249</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 41627</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 37007</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 36867</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 21032</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td> 1788</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 24,
"text": [
" 0 counts\n",
"3 54270 3\n",
"6 46249 6\n",
"1 41627 1\n",
"4 37007 4\n",
"2 36867 2\n",
"5 21032 5\n",
"7 1788 7"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf.index = range(len(dccdf.index))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 26
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>counts</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 54270</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 46249</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 41627</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 37007</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 36867</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 21032</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 1788</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 27,
"text": [
" 0 counts\n",
"0 54270 3\n",
"1 46249 6\n",
"2 41627 1\n",
"3 37007 4\n",
"4 36867 2\n",
"5 21032 5\n",
"6 1788 7"
]
}
],
"prompt_number": 27
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf.columns = ['DISTRICT_ID','Count']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dccdf"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DISTRICT_ID</th>\n",
" <th>Count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> 54270</td>\n",
" <td> 3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 46249</td>\n",
" <td> 6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 41627</td>\n",
" <td> 1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 37007</td>\n",
" <td> 4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 36867</td>\n",
" <td> 2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 21032</td>\n",
" <td> 5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td> 1788</td>\n",
" <td> 7</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 29,
"text": [
" DISTRICT_ID Count\n",
"0 54270 3\n",
"1 46249 6\n",
"2 41627 1\n",
"3 37007 4\n",
"4 36867 2\n",
"5 21032 5\n",
"6 1788 7"
]
}
],
"prompt_number": 29
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment