source: https://www.opendataphilly.org/dataset/philadelphia-police-part-one-crime-incidents
Download json files and need to do small transforms:
$ mkdir phillycrime
$ cd phillycrime
$ mkdir 2006
$ cd 2006
$ wget http://data.phl.opendata.arcgis.com/datasets/6b910edd9ca74577b50eab71564772f4_0.geojson -O $(basename $(pwd))
# remove first five lines
$ sed -i -e '1,5d' $(basename $(pwd)).json
# remove last 2 lines
$ head -n -2 $(basename $(pwd)).json > temp.json && mv temp.json $(basename $(pwd)).json
# remove commas after each line
$ sed -i 's/,$//g' $(basename $(pwd)).json
Show JSON Data
$ head -1 ~/projects/phillycrime/2014/2014.json | python -m json.tool
{
"geometry": {
"coordinates": [
-75.131897070999969,
40.021612864000076
],
"type": "Point"
},
"properties": {
"DC_DIST": "25",
"DC_KEY": "201425044265",
"DISPATCH_DATE": "2014-06-01",
"DISPATCH_DATE_TIME": "2014-06-01T02:52:00.000Z",
"DISPATCH_TIME": "02:52:00",
"GlobalID": "6c2bbf3b-2cbc-4ccc-9c3e-bc0634a3f4fb",
"HOUR": null,
"LOCATION_BLOCK": "400 BLOCK W LURAY ST",
"OBJECTID": 3001,
"POINT_X": -75.131896999999995,
"POINT_Y": 40.021613000000002,
"SECTOR": "2",
"TEXT_GENERAL_CODE": "Recovered Stolen Motor Vehicle",
"UCR_GENERAL": "700"
},
"type": "Feature"
}
Create view from JSON
create or replace view maprfs.cmatta.philly_crime as select
c.properties.GobalID as `global_id`,
CAST(CONCAT(c.properties.DISPATCH_DATE, ' ',c.properties.DISPATCH_TIME) as TIMESTAMP) as `dispatch_datetime`,
c.properties.TEXT_GENERAL_CODE as `general_code`,
c.properties.LOCATION_BLOCK as `location_block`,
c.properties.DC_DIST as `dc_district`,
c.properties.SECTOR as `sector`,
c.geometry.coordinates[0] as `latitude`,
c.geometry.coordinates[1] as `longitude`
from maprfs.cmatta.`phillycrime` c;
Data Exploration
select
`general_code`,
count(1) as `count`
FROM `philly_crime`
GROUP BY `general_code`
ORDER BY `count` DESC limit 10;
select
`location_block`,
`general_code`,
count(1) as `count`
FROM `philly_crime`
GROUP BY `location_block`, `general_code`
ORDER BY `count` DESC limit 10;
select
`dc_district`,
count(1) as `count`
FROM `philly_crime`
GROUP BY `dc_district`
ORDER BY `count` DESC limit 10;
SELECT
to_char(`dispatch_datetime`, 'E') as `weekday`,
COUNT(1) as `count`
FROM `philly_crime`
GROUP BY to_char(`dispatch_datetime`, 'E')
ORDER BY `count` DESC;
SELECT
DATE_PART('HOUR', `dispatch_datetime`) as `Hour`,
COUNT(1) as `count`
FROM `philly_crime`
GROUP BY DATE_PART('HOUR', `dispatch_datetime`)
ORDER BY `count` DESC;