Skip to content

Instantly share code, notes, and snippets.

@cjmatta
Last active August 29, 2015 14:22
Show Gist options
  • Save cjmatta/5ead4b4ac36052e7ec90 to your computer and use it in GitHub Desktop.
Save cjmatta/5ead4b4ac36052e7ec90 to your computer and use it in GitHub Desktop.
Drill Demo - Philly Crime JSON

Philly crime database - Drill Exploration

source: https://www.opendataphilly.org/dataset/philadelphia-police-part-one-crime-incidents

Download json files and need to do small transforms:

$ mkdir phillycrime
$ cd phillycrime
$ mkdir 2006
$ cd 2006
$ wget http://data.phl.opendata.arcgis.com/datasets/6b910edd9ca74577b50eab71564772f4_0.geojson -O $(basename $(pwd))
# remove first five lines
$ sed -i -e '1,5d' $(basename $(pwd)).json
# remove last 2 lines
$ head -n -2 $(basename $(pwd)).json > temp.json && mv temp.json $(basename $(pwd)).json
# remove commas after each line
$ sed -i 's/,$//g' $(basename $(pwd)).json

Show JSON Data

$ head -1 ~/projects/phillycrime/2014/2014.json | python -m json.tool
{
    "geometry": {
        "coordinates": [
            -75.131897070999969,
            40.021612864000076
        ],
        "type": "Point"
    },
    "properties": {
        "DC_DIST": "25",
        "DC_KEY": "201425044265",
        "DISPATCH_DATE": "2014-06-01",
        "DISPATCH_DATE_TIME": "2014-06-01T02:52:00.000Z",
        "DISPATCH_TIME": "02:52:00",
        "GlobalID": "6c2bbf3b-2cbc-4ccc-9c3e-bc0634a3f4fb",
        "HOUR": null,
        "LOCATION_BLOCK": "400 BLOCK W LURAY ST",
        "OBJECTID": 3001,
        "POINT_X": -75.131896999999995,
        "POINT_Y": 40.021613000000002,
        "SECTOR": "2",
        "TEXT_GENERAL_CODE": "Recovered Stolen Motor Vehicle",
        "UCR_GENERAL": "700"
    },
    "type": "Feature"
}

Create view from JSON

create or replace view maprfs.cmatta.philly_crime as select
    c.properties.GobalID as `global_id`,
    CAST(CONCAT(c.properties.DISPATCH_DATE, ' ',c.properties.DISPATCH_TIME) as TIMESTAMP) as `dispatch_datetime`,
    c.properties.TEXT_GENERAL_CODE as `general_code`,
    c.properties.LOCATION_BLOCK as `location_block`,
    c.properties.DC_DIST as `dc_district`,
    c.properties.SECTOR as `sector`,
    c.geometry.coordinates[0] as `latitude`,
    c.geometry.coordinates[1] as `longitude`
from maprfs.cmatta.`phillycrime` c;

Data Exploration

Common Crimes

select
    `general_code`,
    count(1) as `count`
FROM `philly_crime`
GROUP BY `general_code`
ORDER BY `count` DESC limit 10;

High crime areas and the crimes

select 
    `location_block`,
    `general_code`,
    count(1) as `count`
FROM `philly_crime`
GROUP BY `location_block`, `general_code`
ORDER BY `count` DESC limit 10;

High crime districts

select
    `dc_district`,
    count(1) as `count`
FROM `philly_crime`
GROUP BY `dc_district`
ORDER BY `count` DESC limit 10;

Crimes by day of the week

SELECT
    to_char(`dispatch_datetime`, 'E') as `weekday`,
    COUNT(1) as `count`
FROM `philly_crime`
GROUP BY to_char(`dispatch_datetime`, 'E')
ORDER BY `count` DESC;

Crimes by hour

SELECT
    DATE_PART('HOUR', `dispatch_datetime`) as `Hour`,
    COUNT(1) as `count`
FROM `philly_crime`
GROUP BY DATE_PART('HOUR', `dispatch_datetime`)
ORDER BY `count` DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment