Skip to content

Instantly share code, notes, and snippets.

@tuttinator
Created August 3, 2015 05:04
Show Gist options
  • Save tuttinator/72f7753d44772d7963d0 to your computer and use it in GitHub Desktop.
Save tuttinator/72f7753d44772d7963d0 to your computer and use it in GitHub Desktop.
Process NZ MOJ convictions
install.packages('dplyr')
install.packages('ggplot2')
install.packages('readr')
install.packages('ggthemes')
install.packages('stringr')
install.packages('lubridate')
install.packages('reshape2')
install.packages('grid')
library(grid)
library(dplyr)
library(reshape2)
library(ggplot2)
library(readr)
library(ggthemes)
library(lubridate)
library(stringr)
# Read convictions data into a dataframe
convictions <- read_csv("convicted-offenders-02-10-2013.csv")
# tranform the names of the dataframe to lower case
names(convictions) <- names(convictions) %>% tolower()
names(convictions)
# [1] "year" "offence" "sentence" "courtcluster" "agegroup" "gender" "ethnicity" "count"
unique(convictions$year)
# [1] "2003/04" "2004/05" "2005/06" "2006/07" "2007/08" "2008/09" "2009/10" "2010/11" "2011/12" "2012/13"
unique(convictions$offence)
# [1] "Abduction, harassment and other offences against the person"
# [2] "Acts intended to cause injury"
# [3] "Dangerous or negligent acts endangering persons"
# [4] "Fraud, deception and related offences"
# [5] "Homicide and related offences"
# [6] "Illicit drug offences"
# [7] "Miscellaneous offences"
# [8] "Offences against justice procedures, government security and government operations"
# [9] "Prohibited and regulated weapons and explosives offences"
# [10] "Property damage and environmental pollution"
# [11] "Public order offences"
# [12] "Robbery, extortion and related offences"
# [13] "Sexual assault and related offences"
# [14] "Theft and related offences"
# [15] "Traffic and vehicle regulatory offences"
# [16] "Unlawful entry with intent/burglary, break and enter"
unique(convictions$sentence)
# [1] "Community work, Corrections" "Supervision by Community Corrections" "Conviction and discharge"
# [4] "Imprisonment" "Preventive Detention" "Fine"
# [7] "Reparation/Restitution" "Other" "Disqualification from driving"
# [10] "Community Detention" "Intensive Supervision" "Home Detention"
# [13] "Life imprisonment"
unique(convictions$courtcluster)
# [1] "Northland" "Auckland" "Manukau" "Waikato"
# [5] "Bay of Plenty/Coromandel" "Central North Island" "East Coast/Hawke's Bay" "Taranaki/Wanganui"
# [9] "Manawatu/Wairarapa" "Wellington" "Nelson/Marlborough/West Coast" "Canterbury"
# [13] "Otago/South Canterbury" "Southland/Central Otago"
unique(convictions$agegroup)
# [1] "17 to 19" "20 to 24" "25 to 29" "30 to 39" "40+" "Unknown"
unique(convictions$gender)
# [1] "Female" "Male" "Unknown"
unique(convictions$ethnicity)
# [1] "Maori" "European" "Pacific Peoples" "Other" "Unknown"
# Normalise the years from
out <- str_split_fixed(convictions$year, "/", 2)
# Name the split
colnames(out) <- c("start_date", "end_date")
# Append start date to all items in the first column
out[,1] <- paste("30/06/", out[,1], sep = "")
# Append start date and "20" to all items in the second column
out[,2] <- paste("01/07/20", out[,2], sep = "")
# merge the normalised dates with the convictions data frame
convictions <- cbind(out, convictions)
# coerce the string field to a date type for start_date
convictions$start_date <- convictions$start_date %>%
as.Date(format = "%d/%m/%Y")
# coerce the string field to a date type for end_date
convictions$end_date <- convictions$start_date %>%
as.Date(format = "%d/%m/%Y")
# define the list of offences we are interested in
# as 16 may be too many
interesting_offences <- c(
"Fraud, deception and related offences",
"Homicide and related offences",
"Illicit drug offences",
"Robbery, extortion and related offences",
"Sexual assault and related offences",
"Theft and related offences",
"Unlawful entry with intent/burglary, break and enter"
)
# filter and grab a subset
imprisonment_offences <- convictions %>%
filter(sentence == "Imprisonment") %>%
filter(offence %in% interesting_offences)
select(end_date, offence, count) %>%
subset
# get the sums of each group
imprisonment_offences <- imprisonment_offences %>%
group_by(end_date, offence) %>%
summarise(amount = sum(count))
imprisionment_plot <- ggplot(imprisonment_offences, aes(x=end_date, y=amount, colour=offence)) +
geom_line(size = 1) +
ylim(0, max(imprisonment_offences$amount)) + theme_solarized(light = TRUE) +
scale_colour_solarized("red", name = "Offences") +
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
theme(plot.title = element_text(size=20)) +
theme(plot.margin = unit(c(10,10,10,10),"mm")) +
labs(title = "Convictions resulting in imprisonment by offence since 2003") + ylab('Number of convictions') + xlab('Year')
imprisionment_plot
interesting_regions = c(
# "Northland",
# "Bay of Plenty/Coromandel",
# "Manawatu/Wairarapa",
# "Otago/South Canterbury",
"Auckland",
# "Central North Island",
"Wellington",
# "Southland/Central Otago",
"Manukau",
# "East Coast/Hawke's Bay",
# "Nelson/Marlborough/West Coast",
# "Waikato",
# "Taranaki/Wanganui",
"Canterbury"
)
# filter and grab a subset
environmental_convictions <- convictions %>%
filter(offence == "Property damage and environmental pollution") %>%
filter(courtcluster %in% interesting_regions) %>%
select(end_date, courtcluster, count) %>%
subset
# get the sums of each group
environmental_convictions <- environmental_convictions %>%
group_by(end_date, courtcluster) %>%
summarise(amount = sum(count))
environmental_plot <- ggplot(environmental_convictions, aes(x=end_date, y=amount, colour=courtcluster)) +
geom_line(size = 1) +
ylim(0, max(environmental_convictions$amount)) + theme_solarized(light = TRUE) +
scale_colour_solarized("red", name = "Court district") +
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
theme(plot.title = element_text(size=20)) +
theme(plot.margin = unit(c(10,10,10,10),"mm")) +
labs(title = "Environmental pollution and property damage convictions by court region since 2003") + ylab('Number of convictions') + xlab('Year')
environmental_plot
# filter and grab a subset
drug_convictions <- convictions %>%
filter(offence == "Illicit drug offences") %>%
select(end_date, ethnicity, count) %>%
subset
# get the sums of each group
drug_convictions <- drug_convictions %>%
group_by(end_date, ethnicity) %>%
summarise(amount = sum(count))
drug_plot <- ggplot(drug_convictions, aes(x=end_date, y=amount, colour=ethnicity)) +
geom_line(size = 1) +
ylim(0, max(drug_convictions$amount)) + theme_solarized(light = TRUE) +
scale_colour_solarized("red", name = "Ethnicities") +
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
theme(plot.title = element_text(size=20)) +
theme(plot.margin = unit(c(10,10,10,10),"mm")) +
labs(title = "Drug convictions by ethnicity since 2003") + ylab('Number of convictions') + xlab('Year')
drug_plot
interesting_sentences <- c(
# "Life imprisonment",
"Community work, Corrections",
# "Supervision by Community Corrections",
# "Conviction and discharge",
"Imprisonment",
# "Preventive Detention",
"Fine",
# "Reparation/Restitution",
# "Other",
# "Disqualification from driving",
"Community Detention",
# "Intensive Supervision",
"Home Detention"
)
# filter and grab a subset
drug_sentences <- convictions %>%
filter(offence == "Illicit drug offences") %>%
filter(sentence %in% interesting_sentences) %>%
select(end_date, sentence, count) %>%
subset
# get the sums of each group
drug_sentences <- drug_sentences %>%
group_by(end_date, sentence) %>%
summarise(amount = sum(count))
drug_plot <- ggplot(drug_sentences, aes(x=end_date, y=amount, colour=sentence)) +
geom_line(size = 1) +
ylim(0, max(drug_sentences$amount)) + theme_solarized(light = TRUE) +
scale_colour_solarized("red", name = "Sentence type") +
theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
theme(plot.title = element_text(size=20)) +
theme(plot.margin = unit(c(10,10,10,10),"mm")) +
labs(title = "Drug convictions by sentence 2003") + ylab('Number of convictions') + xlab('Year')
drug_plot
# output a CSV of all drug convictions by sentence
# filter and grab a subset
all_drug_sentences <- convictions %>%
filter(offence == "Illicit drug offences") %>%
select(end_date, sentence, count) %>%
subset
# get the sums of each group?
all_drug_sentences <- all_drug_sentences %>%
group_by(end_date, sentence) %>%
summarise(amount = sum(count))
# transformed dataframe from long to wide
all_drug_sentences <- all_drug_sentences %>%
melt(id.vars = c("end_date", "sentence")) %>%
dcast(end_date ~ sentence)
write.csv(all_drug_sentences, file = 'drug_sentences.csv', na = "", row.names = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment