Created
August 3, 2015 05:04
-
-
Save tuttinator/72f7753d44772d7963d0 to your computer and use it in GitHub Desktop.
Process NZ MOJ convictions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages('dplyr') | |
install.packages('ggplot2') | |
install.packages('readr') | |
install.packages('ggthemes') | |
install.packages('stringr') | |
install.packages('lubridate') | |
install.packages('reshape2') | |
install.packages('grid') | |
library(grid) | |
library(dplyr) | |
library(reshape2) | |
library(ggplot2) | |
library(readr) | |
library(ggthemes) | |
library(lubridate) | |
library(stringr) | |
# Read convictions data into a dataframe | |
convictions <- read_csv("convicted-offenders-02-10-2013.csv") | |
# tranform the names of the dataframe to lower case | |
names(convictions) <- names(convictions) %>% tolower() | |
names(convictions) | |
# [1] "year" "offence" "sentence" "courtcluster" "agegroup" "gender" "ethnicity" "count" | |
unique(convictions$year) | |
# [1] "2003/04" "2004/05" "2005/06" "2006/07" "2007/08" "2008/09" "2009/10" "2010/11" "2011/12" "2012/13" | |
unique(convictions$offence) | |
# [1] "Abduction, harassment and other offences against the person" | |
# [2] "Acts intended to cause injury" | |
# [3] "Dangerous or negligent acts endangering persons" | |
# [4] "Fraud, deception and related offences" | |
# [5] "Homicide and related offences" | |
# [6] "Illicit drug offences" | |
# [7] "Miscellaneous offences" | |
# [8] "Offences against justice procedures, government security and government operations" | |
# [9] "Prohibited and regulated weapons and explosives offences" | |
# [10] "Property damage and environmental pollution" | |
# [11] "Public order offences" | |
# [12] "Robbery, extortion and related offences" | |
# [13] "Sexual assault and related offences" | |
# [14] "Theft and related offences" | |
# [15] "Traffic and vehicle regulatory offences" | |
# [16] "Unlawful entry with intent/burglary, break and enter" | |
unique(convictions$sentence) | |
# [1] "Community work, Corrections" "Supervision by Community Corrections" "Conviction and discharge" | |
# [4] "Imprisonment" "Preventive Detention" "Fine" | |
# [7] "Reparation/Restitution" "Other" "Disqualification from driving" | |
# [10] "Community Detention" "Intensive Supervision" "Home Detention" | |
# [13] "Life imprisonment" | |
unique(convictions$courtcluster) | |
# [1] "Northland" "Auckland" "Manukau" "Waikato" | |
# [5] "Bay of Plenty/Coromandel" "Central North Island" "East Coast/Hawke's Bay" "Taranaki/Wanganui" | |
# [9] "Manawatu/Wairarapa" "Wellington" "Nelson/Marlborough/West Coast" "Canterbury" | |
# [13] "Otago/South Canterbury" "Southland/Central Otago" | |
unique(convictions$agegroup) | |
# [1] "17 to 19" "20 to 24" "25 to 29" "30 to 39" "40+" "Unknown" | |
unique(convictions$gender) | |
# [1] "Female" "Male" "Unknown" | |
unique(convictions$ethnicity) | |
# [1] "Maori" "European" "Pacific Peoples" "Other" "Unknown" | |
# Normalise the years from | |
out <- str_split_fixed(convictions$year, "/", 2) | |
# Name the split | |
colnames(out) <- c("start_date", "end_date") | |
# Append start date to all items in the first column | |
out[,1] <- paste("30/06/", out[,1], sep = "") | |
# Append start date and "20" to all items in the second column | |
out[,2] <- paste("01/07/20", out[,2], sep = "") | |
# merge the normalised dates with the convictions data frame | |
convictions <- cbind(out, convictions) | |
# coerce the string field to a date type for start_date | |
convictions$start_date <- convictions$start_date %>% | |
as.Date(format = "%d/%m/%Y") | |
# coerce the string field to a date type for end_date | |
convictions$end_date <- convictions$start_date %>% | |
as.Date(format = "%d/%m/%Y") | |
# define the list of offences we are interested in | |
# as 16 may be too many | |
interesting_offences <- c( | |
"Fraud, deception and related offences", | |
"Homicide and related offences", | |
"Illicit drug offences", | |
"Robbery, extortion and related offences", | |
"Sexual assault and related offences", | |
"Theft and related offences", | |
"Unlawful entry with intent/burglary, break and enter" | |
) | |
# filter and grab a subset | |
imprisonment_offences <- convictions %>% | |
filter(sentence == "Imprisonment") %>% | |
filter(offence %in% interesting_offences) | |
select(end_date, offence, count) %>% | |
subset | |
# get the sums of each group | |
imprisonment_offences <- imprisonment_offences %>% | |
group_by(end_date, offence) %>% | |
summarise(amount = sum(count)) | |
imprisionment_plot <- ggplot(imprisonment_offences, aes(x=end_date, y=amount, colour=offence)) + | |
geom_line(size = 1) + | |
ylim(0, max(imprisonment_offences$amount)) + theme_solarized(light = TRUE) + | |
scale_colour_solarized("red", name = "Offences") + | |
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) + | |
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) + | |
theme(plot.title = element_text(size=20)) + | |
theme(plot.margin = unit(c(10,10,10,10),"mm")) + | |
labs(title = "Convictions resulting in imprisonment by offence since 2003") + ylab('Number of convictions') + xlab('Year') | |
imprisionment_plot | |
interesting_regions = c( | |
# "Northland", | |
# "Bay of Plenty/Coromandel", | |
# "Manawatu/Wairarapa", | |
# "Otago/South Canterbury", | |
"Auckland", | |
# "Central North Island", | |
"Wellington", | |
# "Southland/Central Otago", | |
"Manukau", | |
# "East Coast/Hawke's Bay", | |
# "Nelson/Marlborough/West Coast", | |
# "Waikato", | |
# "Taranaki/Wanganui", | |
"Canterbury" | |
) | |
# filter and grab a subset | |
environmental_convictions <- convictions %>% | |
filter(offence == "Property damage and environmental pollution") %>% | |
filter(courtcluster %in% interesting_regions) %>% | |
select(end_date, courtcluster, count) %>% | |
subset | |
# get the sums of each group | |
environmental_convictions <- environmental_convictions %>% | |
group_by(end_date, courtcluster) %>% | |
summarise(amount = sum(count)) | |
environmental_plot <- ggplot(environmental_convictions, aes(x=end_date, y=amount, colour=courtcluster)) + | |
geom_line(size = 1) + | |
ylim(0, max(environmental_convictions$amount)) + theme_solarized(light = TRUE) + | |
scale_colour_solarized("red", name = "Court district") + | |
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) + | |
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) + | |
theme(plot.title = element_text(size=20)) + | |
theme(plot.margin = unit(c(10,10,10,10),"mm")) + | |
labs(title = "Environmental pollution and property damage convictions by court region since 2003") + ylab('Number of convictions') + xlab('Year') | |
environmental_plot | |
# filter and grab a subset | |
drug_convictions <- convictions %>% | |
filter(offence == "Illicit drug offences") %>% | |
select(end_date, ethnicity, count) %>% | |
subset | |
# get the sums of each group | |
drug_convictions <- drug_convictions %>% | |
group_by(end_date, ethnicity) %>% | |
summarise(amount = sum(count)) | |
drug_plot <- ggplot(drug_convictions, aes(x=end_date, y=amount, colour=ethnicity)) + | |
geom_line(size = 1) + | |
ylim(0, max(drug_convictions$amount)) + theme_solarized(light = TRUE) + | |
scale_colour_solarized("red", name = "Ethnicities") + | |
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) + | |
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) + | |
theme(plot.title = element_text(size=20)) + | |
theme(plot.margin = unit(c(10,10,10,10),"mm")) + | |
labs(title = "Drug convictions by ethnicity since 2003") + ylab('Number of convictions') + xlab('Year') | |
drug_plot | |
interesting_sentences <- c( | |
# "Life imprisonment", | |
"Community work, Corrections", | |
# "Supervision by Community Corrections", | |
# "Conviction and discharge", | |
"Imprisonment", | |
# "Preventive Detention", | |
"Fine", | |
# "Reparation/Restitution", | |
# "Other", | |
# "Disqualification from driving", | |
"Community Detention", | |
# "Intensive Supervision", | |
"Home Detention" | |
) | |
# filter and grab a subset | |
drug_sentences <- convictions %>% | |
filter(offence == "Illicit drug offences") %>% | |
filter(sentence %in% interesting_sentences) %>% | |
select(end_date, sentence, count) %>% | |
subset | |
# get the sums of each group | |
drug_sentences <- drug_sentences %>% | |
group_by(end_date, sentence) %>% | |
summarise(amount = sum(count)) | |
drug_plot <- ggplot(drug_sentences, aes(x=end_date, y=amount, colour=sentence)) + | |
geom_line(size = 1) + | |
ylim(0, max(drug_sentences$amount)) + theme_solarized(light = TRUE) + | |
scale_colour_solarized("red", name = "Sentence type") + | |
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) + | |
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) + | |
theme(plot.title = element_text(size=20)) + | |
theme(plot.margin = unit(c(10,10,10,10),"mm")) + | |
labs(title = "Drug convictions by sentence 2003") + ylab('Number of convictions') + xlab('Year') | |
drug_plot | |
# output a CSV of all drug convictions by sentence | |
# filter and grab a subset | |
all_drug_sentences <- convictions %>% | |
filter(offence == "Illicit drug offences") %>% | |
select(end_date, sentence, count) %>% | |
subset | |
# get the sums of each group? | |
all_drug_sentences <- all_drug_sentences %>% | |
group_by(end_date, sentence) %>% | |
summarise(amount = sum(count)) | |
# transformed dataframe from long to wide | |
all_drug_sentences <- all_drug_sentences %>% | |
melt(id.vars = c("end_date", "sentence")) %>% | |
dcast(end_date ~ sentence) | |
write.csv(all_drug_sentences, file = 'drug_sentences.csv', na = "", row.names = FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment