tuttinator · August 3, 2015 05:04
diff --git a/convictions.R b/convictions.R
 install.packages('dplyr')
 install.packages('ggplot2')
 install.packages('readr')
 install.packages('ggthemes')
 install.packages('stringr')
 install.packages('lubridate')
 install.packages('reshape2')
 install.packages('grid')

 library(grid)
 library(dplyr)
 library(reshape2)
 library(ggplot2)
 library(readr)
 library(ggthemes)
 library(lubridate)
 library(stringr)

 # Read convictions data into a dataframe
 convictions <- read_csv("convicted-offenders-02-10-2013.csv")

 # tranform the names of the dataframe to lower case
 names(convictions) <- names(convictions) %>% tolower()

 names(convictions)
 # [1] "year"         "offence"      "sentence"     "courtcluster" "agegroup"     "gender"       "ethnicity"    "count" 

 unique(convictions$year)
 # [1] "2003/04" "2004/05" "2005/06" "2006/07" "2007/08" "2008/09" "2009/10" "2010/11" "2011/12" "2012/13"

 unique(convictions$offence)
 # [1] "Abduction, harassment and other offences against the person"                       
 # [2] "Acts intended to cause injury"                                                     
 # [3] "Dangerous or negligent acts endangering persons"                                   
 # [4] "Fraud, deception and related offences"                                             
 # [5] "Homicide and related offences"                                                     
 # [6] "Illicit drug offences"                                                             
 # [7] "Miscellaneous offences"                                                            
 # [8] "Offences against justice procedures, government security and government operations"
 # [9] "Prohibited and regulated weapons and explosives offences"                          
 # [10] "Property damage and environmental pollution"                                       
 # [11] "Public order offences"                                                             
 # [12] "Robbery, extortion and related offences"                                           
 # [13] "Sexual assault and related offences"                                               
 # [14] "Theft and related offences"                                                        
 # [15] "Traffic and vehicle regulatory offences"                                           
 # [16] "Unlawful entry with intent/burglary, break and enter"  

 unique(convictions$sentence)
 # [1] "Community work, Corrections"          "Supervision by Community Corrections" "Conviction and discharge"            
 # [4] "Imprisonment"                         "Preventive Detention"                 "Fine"                                
 # [7] "Reparation/Restitution"               "Other"                                "Disqualification from driving"       
 # [10] "Community Detention"                 "Intensive Supervision"                "Home Detention"                      
 # [13] "Life imprisonment" 

 unique(convictions$courtcluster)
 # [1] "Northland"                     "Auckland"                      "Manukau"                       "Waikato"                      
 # [5] "Bay of Plenty/Coromandel"      "Central North Island"          "East Coast/Hawke's Bay"        "Taranaki/Wanganui"            
 # [9] "Manawatu/Wairarapa"            "Wellington"                    "Nelson/Marlborough/West Coast" "Canterbury"                   
 # [13] "Otago/South Canterbury"        "Southland/Central Otago"   

 unique(convictions$agegroup)
 # [1] "17 to 19" "20 to 24" "25 to 29" "30 to 39" "40+"      "Unknown"

 unique(convictions$gender)
 # [1] "Female"  "Male"    "Unknown"

 unique(convictions$ethnicity)
 # [1] "Maori"           "European"        "Pacific Peoples" "Other"           "Unknown"

 # Normalise the years from 
 out <- str_split_fixed(convictions$year, "/", 2)

 # Name the split
 colnames(out) <- c("start_date", "end_date")

 # Append start date to all items in the first column
 out[,1] <- paste("30/06/", out[,1], sep = "")

 # Append start date and "20" to all items in the second column
 out[,2] <- paste("01/07/20", out[,2], sep = "")

 # merge the normalised dates with the convictions data frame
 convictions <- cbind(out, convictions)

 # coerce the string field to a date type for start_date
 convictions$start_date <- convictions$start_date %>% 
  as.Date(format = "%d/%m/%Y")

 # coerce the string field to a date type for end_date
 convictions$end_date <- convictions$start_date %>% 
  as.Date(format = "%d/%m/%Y")

 # define the list of offences we are interested in
 # as 16 may be too many
 interesting_offences <- c(
  "Fraud, deception and related offences",
  "Homicide and related offences",
  "Illicit drug offences",
  "Robbery, extortion and related offences",
  "Sexual assault and related offences",
  "Theft and related offences",
  "Unlawful entry with intent/burglary, break and enter"
 )

 # filter and grab a subset
 imprisonment_offences <- convictions %>% 
  filter(sentence == "Imprisonment") %>%
  filter(offence %in% interesting_offences)
  select(end_date, offence, count) %>%
  subset

 # get the sums of each group
 imprisonment_offences <- imprisonment_offences %>%
  group_by(end_date, offence) %>%
  summarise(amount = sum(count))



 imprisionment_plot <- ggplot(imprisonment_offences, aes(x=end_date, y=amount, colour=offence)) +
  geom_line(size = 1) +
  ylim(0, max(imprisonment_offences$amount)) + theme_solarized(light = TRUE) + 
  scale_colour_solarized("red", name = "Offences") +
  theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
  theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
  theme(plot.title = element_text(size=20)) +
  theme(plot.margin = unit(c(10,10,10,10),"mm")) +
  labs(title = "Convictions resulting in imprisonment by offence since 2003") + ylab('Number of convictions') + xlab('Year')
 imprisionment_plot

 interesting_regions = c(
  # "Northland", 
  # "Bay of Plenty/Coromandel", 
  # "Manawatu/Wairarapa", 
  # "Otago/South Canterbury", 
  "Auckland", 
  # "Central North Island", 
  "Wellington", 
  # "Southland/Central Otago", 
  "Manukau", 
  # "East Coast/Hawke's Bay", 
  # "Nelson/Marlborough/West Coast", 
  # "Waikato", 
  # "Taranaki/Wanganui", 
  "Canterbury"
 )

 # filter and grab a subset
 environmental_convictions <- convictions %>% 
  filter(offence == "Property damage and environmental pollution") %>%
  filter(courtcluster %in% interesting_regions) %>%
  select(end_date, courtcluster, count) %>%
  subset

 # get the sums of each group
 environmental_convictions <- environmental_convictions %>%
  group_by(end_date, courtcluster) %>%
  summarise(amount = sum(count))

 environmental_plot <- ggplot(environmental_convictions, aes(x=end_date, y=amount, colour=courtcluster)) +
  geom_line(size = 1) +
  ylim(0, max(environmental_convictions$amount)) + theme_solarized(light = TRUE) + 
  scale_colour_solarized("red", name = "Court district") +
  theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
  theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
  theme(plot.title = element_text(size=20)) +
  theme(plot.margin = unit(c(10,10,10,10),"mm")) +
  labs(title = "Environmental pollution and property damage convictions by court region since 2003") + ylab('Number of convictions') + xlab('Year')
 environmental_plot

 # filter and grab a subset
 drug_convictions <- convictions %>% 
  filter(offence == "Illicit drug offences") %>%
  select(end_date, ethnicity, count) %>%
  subset

 # get the sums of each group
 drug_convictions <- drug_convictions %>%
  group_by(end_date, ethnicity) %>%
  summarise(amount = sum(count))

 drug_plot <- ggplot(drug_convictions, aes(x=end_date, y=amount, colour=ethnicity)) +
  geom_line(size = 1) +
  ylim(0, max(drug_convictions$amount)) + theme_solarized(light = TRUE) + 
  scale_colour_solarized("red", name = "Ethnicities") +
  theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
  theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
  theme(plot.title = element_text(size=20)) +
  theme(plot.margin = unit(c(10,10,10,10),"mm")) +
  labs(title = "Drug convictions by ethnicity since 2003") + ylab('Number of convictions') + xlab('Year')
 drug_plot

 interesting_sentences <- c(
  # "Life imprisonment",
  "Community work, Corrections",
  # "Supervision by Community Corrections",
  # "Conviction and discharge",
  "Imprisonment",
  # "Preventive Detention",
  "Fine",
  # "Reparation/Restitution",
  # "Other",
  # "Disqualification from driving",
  "Community Detention",
  # "Intensive Supervision",
  "Home Detention"
  
 )

 # filter and grab a subset
 drug_sentences <- convictions %>% 
  filter(offence == "Illicit drug offences") %>%
  filter(sentence %in% interesting_sentences) %>%
  select(end_date, sentence, count) %>%
  subset

 # get the sums of each group
 drug_sentences <- drug_sentences %>%
  group_by(end_date, sentence) %>%
  summarise(amount = sum(count))

 drug_plot <- ggplot(drug_sentences, aes(x=end_date, y=amount, colour=sentence)) +
  geom_line(size = 1) +
  ylim(0, max(drug_sentences$amount)) + theme_solarized(light = TRUE) + 
  scale_colour_solarized("red", name = "Sentence type") +
  theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
  theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
  theme(plot.title = element_text(size=20)) +
  theme(plot.margin = unit(c(10,10,10,10),"mm")) +
  labs(title = "Drug convictions by sentence 2003") + ylab('Number of convictions') + xlab('Year')
 drug_plot


 # output a CSV of all drug convictions by sentence

 # filter and grab a subset
 all_drug_sentences <- convictions %>% 
  filter(offence == "Illicit drug offences") %>%
  select(end_date, sentence, count) %>%
  subset

 # get the sums of each group?
 all_drug_sentences <- all_drug_sentences %>%
  group_by(end_date, sentence) %>%
  summarise(amount = sum(count))

 # transformed dataframe from long to wide
 all_drug_sentences <- all_drug_sentences %>%
  melt(id.vars = c("end_date", "sentence")) %>%
  dcast(end_date ~ sentence)

 write.csv(all_drug_sentences, file = 'drug_sentences.csv', na = "", row.names = FALSE)
	install.packages('dplyr')
	install.packages('ggplot2')
	install.packages('readr')
	install.packages('ggthemes')
	install.packages('stringr')
	install.packages('lubridate')
	install.packages('reshape2')
	install.packages('grid')

	library(grid)
	library(dplyr)
	library(reshape2)
	library(ggplot2)
	library(readr)
	library(ggthemes)
	library(lubridate)
	library(stringr)

	# Read convictions data into a dataframe
	convictions <- read_csv("convicted-offenders-02-10-2013.csv")

	# tranform the names of the dataframe to lower case
	names(convictions) <- names(convictions) %>% tolower()

	names(convictions)
	# [1] "year" "offence" "sentence" "courtcluster" "agegroup" "gender" "ethnicity" "count"

	unique(convictions$year)
	# [1] "2003/04" "2004/05" "2005/06" "2006/07" "2007/08" "2008/09" "2009/10" "2010/11" "2011/12" "2012/13"

	unique(convictions$offence)
	# [1] "Abduction, harassment and other offences against the person"
	# [2] "Acts intended to cause injury"
	# [3] "Dangerous or negligent acts endangering persons"
	# [4] "Fraud, deception and related offences"
	# [5] "Homicide and related offences"
	# [6] "Illicit drug offences"
	# [7] "Miscellaneous offences"
	# [8] "Offences against justice procedures, government security and government operations"
	# [9] "Prohibited and regulated weapons and explosives offences"
	# [10] "Property damage and environmental pollution"
	# [11] "Public order offences"
	# [12] "Robbery, extortion and related offences"
	# [13] "Sexual assault and related offences"
	# [14] "Theft and related offences"
	# [15] "Traffic and vehicle regulatory offences"
	# [16] "Unlawful entry with intent/burglary, break and enter"

	unique(convictions$sentence)
	# [1] "Community work, Corrections" "Supervision by Community Corrections" "Conviction and discharge"
	# [4] "Imprisonment" "Preventive Detention" "Fine"
	# [7] "Reparation/Restitution" "Other" "Disqualification from driving"
	# [10] "Community Detention" "Intensive Supervision" "Home Detention"
	# [13] "Life imprisonment"

	unique(convictions$courtcluster)
	# [1] "Northland" "Auckland" "Manukau" "Waikato"
	# [5] "Bay of Plenty/Coromandel" "Central North Island" "East Coast/Hawke's Bay" "Taranaki/Wanganui"
	# [9] "Manawatu/Wairarapa" "Wellington" "Nelson/Marlborough/West Coast" "Canterbury"
	# [13] "Otago/South Canterbury" "Southland/Central Otago"

	unique(convictions$agegroup)
	# [1] "17 to 19" "20 to 24" "25 to 29" "30 to 39" "40+" "Unknown"

	unique(convictions$gender)
	# [1] "Female" "Male" "Unknown"

	unique(convictions$ethnicity)
	# [1] "Maori" "European" "Pacific Peoples" "Other" "Unknown"

	# Normalise the years from
	out <- str_split_fixed(convictions$year, "/", 2)

	# Name the split
	colnames(out) <- c("start_date", "end_date")

	# Append start date to all items in the first column
	out[,1] <- paste("30/06/", out[,1], sep = "")

	# Append start date and "20" to all items in the second column
	out[,2] <- paste("01/07/20", out[,2], sep = "")

	# merge the normalised dates with the convictions data frame
	convictions <- cbind(out, convictions)

	# coerce the string field to a date type for start_date
	convictions$start_date <- convictions$start_date %>%
	as.Date(format = "%d/%m/%Y")

	# coerce the string field to a date type for end_date
	convictions$end_date <- convictions$start_date %>%
	as.Date(format = "%d/%m/%Y")

	# define the list of offences we are interested in
	# as 16 may be too many
	interesting_offences <- c(
	"Fraud, deception and related offences",
	"Homicide and related offences",
	"Illicit drug offences",
	"Robbery, extortion and related offences",
	"Sexual assault and related offences",
	"Theft and related offences",
	"Unlawful entry with intent/burglary, break and enter"
	)

	# filter and grab a subset
	imprisonment_offences <- convictions %>%
	filter(sentence == "Imprisonment") %>%
	filter(offence %in% interesting_offences)
	select(end_date, offence, count) %>%
	subset

	# get the sums of each group
	imprisonment_offences <- imprisonment_offences %>%
	group_by(end_date, offence) %>%
	summarise(amount = sum(count))



	imprisionment_plot <- ggplot(imprisonment_offences, aes(x=end_date, y=amount, colour=offence)) +
	geom_line(size = 1) +
	ylim(0, max(imprisonment_offences$amount)) + theme_solarized(light = TRUE) +
	scale_colour_solarized("red", name = "Offences") +
	theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
	theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
	theme(plot.title = element_text(size=20)) +
	theme(plot.margin = unit(c(10,10,10,10),"mm")) +
	labs(title = "Convictions resulting in imprisonment by offence since 2003") + ylab('Number of convictions') + xlab('Year')
	imprisionment_plot

	interesting_regions = c(
	# "Northland",
	# "Bay of Plenty/Coromandel",
	# "Manawatu/Wairarapa",
	# "Otago/South Canterbury",
	"Auckland",
	# "Central North Island",
	"Wellington",
	# "Southland/Central Otago",
	"Manukau",
	# "East Coast/Hawke's Bay",
	# "Nelson/Marlborough/West Coast",
	# "Waikato",
	# "Taranaki/Wanganui",
	"Canterbury"
	)

	# filter and grab a subset
	environmental_convictions <- convictions %>%
	filter(offence == "Property damage and environmental pollution") %>%
	filter(courtcluster %in% interesting_regions) %>%
	select(end_date, courtcluster, count) %>%
	subset

	# get the sums of each group
	environmental_convictions <- environmental_convictions %>%
	group_by(end_date, courtcluster) %>%
	summarise(amount = sum(count))

	environmental_plot <- ggplot(environmental_convictions, aes(x=end_date, y=amount, colour=courtcluster)) +
	geom_line(size = 1) +
	ylim(0, max(environmental_convictions$amount)) + theme_solarized(light = TRUE) +
	scale_colour_solarized("red", name = "Court district") +
	theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
	theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
	theme(plot.title = element_text(size=20)) +
	theme(plot.margin = unit(c(10,10,10,10),"mm")) +
	labs(title = "Environmental pollution and property damage convictions by court region since 2003") + ylab('Number of convictions') + xlab('Year')
	environmental_plot

	# filter and grab a subset
	drug_convictions <- convictions %>%
	filter(offence == "Illicit drug offences") %>%
	select(end_date, ethnicity, count) %>%
	subset

	# get the sums of each group
	drug_convictions <- drug_convictions %>%
	group_by(end_date, ethnicity) %>%
	summarise(amount = sum(count))

	drug_plot <- ggplot(drug_convictions, aes(x=end_date, y=amount, colour=ethnicity)) +
	geom_line(size = 1) +
	ylim(0, max(drug_convictions$amount)) + theme_solarized(light = TRUE) +
	scale_colour_solarized("red", name = "Ethnicities") +
	theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
	theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
	theme(plot.title = element_text(size=20)) +
	theme(plot.margin = unit(c(10,10,10,10),"mm")) +
	labs(title = "Drug convictions by ethnicity since 2003") + ylab('Number of convictions') + xlab('Year')
	drug_plot

	interesting_sentences <- c(
	# "Life imprisonment",
	"Community work, Corrections",
	# "Supervision by Community Corrections",
	# "Conviction and discharge",
	"Imprisonment",
	# "Preventive Detention",
	"Fine",
	# "Reparation/Restitution",
	# "Other",
	# "Disqualification from driving",
	"Community Detention",
	# "Intensive Supervision",
	"Home Detention"

	)

	# filter and grab a subset
	drug_sentences <- convictions %>%
	filter(offence == "Illicit drug offences") %>%
	filter(sentence %in% interesting_sentences) %>%
	select(end_date, sentence, count) %>%
	subset

	# get the sums of each group
	drug_sentences <- drug_sentences %>%
	group_by(end_date, sentence) %>%
	summarise(amount = sum(count))

	drug_plot <- ggplot(drug_sentences, aes(x=end_date, y=amount, colour=sentence)) +
	geom_line(size = 1) +
	ylim(0, max(drug_sentences$amount)) + theme_solarized(light = TRUE) +
	scale_colour_solarized("red", name = "Sentence type") +
	theme(axis.text = element_text(size = 18), axis.title = element_text(size=18), axis.title.y = element_text(vjust=1.4)) +
	theme(legend.position = "right", legend.text = element_text(size=10), legend.title = element_text(size=10)) +
	theme(plot.title = element_text(size=20)) +
	theme(plot.margin = unit(c(10,10,10,10),"mm")) +
	labs(title = "Drug convictions by sentence 2003") + ylab('Number of convictions') + xlab('Year')
	drug_plot


	# output a CSV of all drug convictions by sentence

	# filter and grab a subset
	all_drug_sentences <- convictions %>%
	filter(offence == "Illicit drug offences") %>%
	select(end_date, sentence, count) %>%
	subset

	# get the sums of each group?
	all_drug_sentences <- all_drug_sentences %>%
	group_by(end_date, sentence) %>%
	summarise(amount = sum(count))

	# transformed dataframe from long to wide
	all_drug_sentences <- all_drug_sentences %>%
	melt(id.vars = c("end_date", "sentence")) %>%
	dcast(end_date ~ sentence)

	write.csv(all_drug_sentences, file = 'drug_sentences.csv', na = "", row.names = FALSE)