Skip to content

Instantly share code, notes, and snippets.

@bohdanszymanik
Created August 1, 2025 04:08
Show Gist options
  • Save bohdanszymanik/4e0a57cf4a2a1973cafdbcd4273ee066 to your computer and use it in GitHub Desktop.
Save bohdanszymanik/4e0a57cf4a2a1973cafdbcd4273ee066 to your computer and use it in GitHub Desktop.
Wellington CCTV crime comparison cleaner
library(tidyverse)
library(sf)
library(tictoc)
library(future)
library(units)
#library(remotes)
#install_github("yonghah/esri2sf")
library("esri2sf")
library(dplyr)
library(purrr)
data <- tibble(
x = 1:3,
y = 4:6,
z = 7:9
)
# Define a function that takes multiple arguments
my_function <- function(arg1, arg2) {
arg1 * arg2
}
# Apply the function row-wise using rowwise() and pmap()
result <- data %>%
rowwise() %>%
mutate(a = pmap_dbl(list(x, y), my_function)) %>%
ungroup() # Ungroup to remove the rowwise attribute
print(result)
#
# 1. First we get the CCTV camera locations, and more importantly, the viewsheds
#
sf_use_s2(TRUE)
wgtn_cctv_url <- "https://services1.arcgis.com/CPYspmTk3abe6d7i/arcgis/rest/services/CCTV_Viewsheds_(View_layer)/FeatureServer/0"
wgtn_cctv <- esri2sf(wgtn_cctv_url)
glimpse(wgtn_cctv)
# take a peek at the geometry column - normal glimpse won't show me enough
sf::st_as_text(wgtn_cctv$geoms)
wgtn_cctv |> slice_head(n = 2) |> mutate(wkt = st_as_text(geoms))
ggplot(wgtn_cctv) +
geom_sf() +
coord_sf(xlim = c(174.77, 174.78), ylim = c(-41.275, -41.30)) +
geom_sf_label(aes(label = Camera_Name), size = 2)
#
# 2. Next we load in the meshblocks 2013 for wellington from https://datafinder.stats.govt.nz/layer/8347-meshblock-2013/
# I suggest cropping down to just the Wellington region first otherwise you're just wasting memory and compute
#
mb2013_wgtn <- st_read("C:\\Users\\...\\wd\\Areas\\statsnz-meshblock-2013-SHP-wgtn\\meshblock-2013.shp")
ggplot(data = mb2013_wgtn) +
geom_sf() +
theme_minimal()
# I could clip meshblocks to the coastline... but actually I'm focussed on inner city areas where that's not a significant issue for me
# however, for the sake of completeness... let's do it. We use https://data.linz.govt.nz/layer/51153-nz-coastlines-and-islands-polygons-topo-150k/
# (it needs to be the polygons dataset not one with polylines!)
# I strongly suggest clipping that down inside the Koordinates viewer to eg the Wellington Region TA
nz_coastline <- st_read("C:\\Users\\...\\Downloads\\lds-nz-coastlines-and-islands-polygons-topo-150k-SHP\\nz-coastlines-and-islands-polygons-topo-150k.shp")
# clip to wellington region
wgtn_bbox <- st_as_sfc(mb2013_wgtn |> st_bbox())
# ggplot(data = wgtn_bbox) +
# geom_sf() +
# theme_minimal()
tic()
nz_coastline_wgtn <- st_intersection(wgtn_bbox, nz_coastline)
toc()
ggplot(data = nz_coastline_wgtn) +
geom_sf() +
theme_minimal()
# the meshblocks come as multipolygons - for handling lakes etc?
# but this slows down geometric operations so if we can get away with
# polygons then this will speed things up - actually based on numbers here
# probably not necessary
# 1.08s
tic()
mb2013_wgtn_p <- st_make_valid(st_cast(mb2013_wgtn, "POLYGON"))
toc()
# 5.15 sec
tic()
mb2013_wgtn_p_clip <- st_intersection(mb2013_wgtn_p, nz_coastline_wgtn)
toc()
ggplot(data = mb2013_wgtn_p_clip) +
geom_sf() +
theme_minimal()
# 5.77 sec
tic()
mb2013_wgtn_clip <- st_intersection(mb2013_wgtn, nz_coastline_wgtn)
toc()
ggplot(data = mb2013_wgtn_clip) +
geom_sf() +
theme_minimal()
# it appears we have some LandAreaSQ measures that don't tally with what is calculated by st_area() so let's add a calculated area
mb2013_wgtn_clip$LandAreaSQ_calc <- mb2013_wgtn_clip |> st_area()
# let's union together all those overlaying viewsheds to simplify the geometry
wgtn_cctv_union <- wgtn_cctv |> st_union()
ggplot(data = wgtn_cctv_union) +
geom_sf() +
theme_minimal()
# and now remove the viewsheds from the meshblocks
wgtn_mbs_less_viewsheds <- st_difference(mb2013_wgtn_clip, wgtn_cctv_union) # a tiny bit faster with mb2013_wgtn_small
# won't be able to see the impact at the regional level so zoom into central wellington
# where Lambton Quay meets Willis St
ggplot(data = wgtn_mbs_less_viewsheds) +
geom_sf() +
coord_sf(xlim = c(174.772, 174.778), ylim = c(-41.281, -41.285)) +
theme_minimal()
#################################################################
# Next task is to figure out by how much the area of a meshblock has decreased as a result of
wgtn_mbs_less_viewsheds$AreaMinusViewshed <- st_area(wgtn_mbs_less_viewsheds)
wgtn_mbs_less_viewsheds$ChangeInArea <- wgtn_mbs_less_viewsheds$LandAreaSQ_calc - wgtn_mbs_less_viewsheds$AreaMinusViewshed
wgtn_mbs_less_viewsheds$ChangeInAreaRatio <- 1 - drop_units(wgtn_mbs_less_viewsheds$AreaMinusViewshed / wgtn_mbs_less_viewsheds$LandAreaSQ_calc)
#################################################################
# and now need to join to the crime data
vtp <- read_csv("C:\\Users\\...\\wd\\VictimisationByMeshblock\\vtp_jul2025.csv",
col_types = cols(
`Meshblock` = col_character()
))
glimpse(vtp)
vtp$year <- as.numeric( vtp$`Month Year` |> str_sub(-4) )
vtp_year_mb <-
vtp |>
group_by(year, meshblock = Meshblock) |>
summarise( sum_v = sum(Victimisations))
glimpse(vtp_year_mb)
wgtn_mbs_less_viewsheds_vtp <-
wgtn_mbs_less_viewsheds |>
select(MeshblockN, ChangeInArea, ChangeInAreaRatio, AreaMinusViewshed, LandAreaSQ_calc) |>
left_join(vtp_year_mb, join_by(MeshblockN == meshblock))
glimpse(wgtn_mbs_less_viewsheds_vtp)
# the vtp data only goes back to june 2021 - that sux as it's impacted by covid and it's a midyear value
wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024 <-
wgtn_mbs_less_viewsheds_vtp |> filter(year %in% c(2022, 2024)) |>
pivot_wider(
names_from = year,
values_from = sum_v
)
wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024$v_2022_2024 <- wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024$`2024` - wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024$`2022`
# not sure there's a significant movement below the 0 line
ggplot(data = wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024) +
geom_point(mapping = aes(x = ChangeInAreaRatio, y = v_2022_2024)) +
labs(x = "Proportion of Meshblock in Viewshed", y = "Victimisations 2024 minus victimisations 2022")
## using the ANZSOC Division as an asthetic to colour the points?
vtp_year_mb_anzsocd <-
vtp |>
group_by(year, meshblock = Meshblock, anzsoc_division = `ANZSOC Division`) |>
summarise( sum_v = sum(Victimisations))
glimpse(vtp_year_mb)
wgtn_mbs_less_viewsheds_vtp_anzsocd <-
wgtn_mbs_less_viewsheds |>
select(MeshblockN, ChangeInArea, ChangeInAreaRatio, AreaMinusViewshed, LandAreaSQ_calc) |>
left_join(vtp_year_mb_anzsocd, join_by(MeshblockN == meshblock))
wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024 <-
wgtn_mbs_less_viewsheds_vtp_anzsocd |> filter(year %in% c(2022, 2024)) |>
pivot_wider(
names_from = year,
values_from = sum_v
)
wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024$v_2022_2024 <- wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024$`2024` - wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024$`2022`
# the ANZSOC Division colours again don't show a significant distribution one way or the other
ggplot(data = wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024) +
geom_point(mapping = aes(x = ChangeInAreaRatio, y = v_2022_2024, colour = anzsoc_division)) +
labs(x = "Proportion of Meshblock in Viewshed", y = "Victimisations 2024 minus victimisations 2022")
# another way to look at the data is the absolute change in victimisations
# from 2022 to 2024
# split by there being a noticeable camera deployment versus no deployment
# using an area change of 0.5% as a measure of CCTV deployment
# (remember these viewsheds are only a small part of a meshblock's area)
wgtn_mbs_less_viewsheds_vtp_2022_2024_abs <-
wgtn_mbs_less_viewsheds_vtp |>
filter(year %in% c(2022, 2024)) |>
group_by(CameraImpact = ChangeInAreaRatio > 0.01, year) |>
summarise(sum_sum_v = sum(sum_v))
wgtn_mbs_less_viewsheds_vtp_2022_2024_abs |>
st_drop_geometry() |>
pivot_wider(
names_from = year,
values_from = sum_sum_v
) |>
mutate(change_ratio = (`2024` - `2022`)/`2022`)
# CameraImpact `2022` `2024` change_ratio
# <lgl> <dbl> <dbl> <dbl>
# 1 FALSE 20050 21815 0.0880
# 2 TRUE 4606 5022 0.0903
# seems very strange that there is no benefit at all from the CCTV deployment?
# Let's look at the different ANZSOC Divisions and see if that shows anything
wgtn_mbs_less_viewsheds_vtp_2022_2024_anzsocd_abs <-
wgtn_mbs_less_viewsheds_vtp_anzsocd |>
filter(year %in% c(2022, 2024)) |>
group_by(CameraImpact = ChangeInAreaRatio > 0.05, year, anzsoc_division) |>
summarise(sum_sum_v = sum(sum_v))
wgtn_mbs_less_viewsheds_vtp_2022_2024_anzsocd_abs |>
st_drop_geometry() |>
pivot_wider(
names_from = year,
values_from = sum_sum_v
) |>
mutate(change_ratio = (`2024` - `2022`)/`2022`) |>
arrange(anzsoc_division)
# # Groups: CameraImpact [2]
# CameraImpact anzsoc_division `2022` `2024` change_ratio
# <lgl> <chr> <dbl> <dbl> <dbl>
# 1 FALSE Abduction, Harassment and Other Related Offences Against a Person 22 18 -0.182
# 2 TRUE Abduction, Harassment and Other Related Offences Against a Person 4 3 -0.25
# 3 FALSE Acts Intended to Cause Injury 1784 2119 0.188
# 4 TRUE Acts Intended to Cause Injury 680 731 0.075
# 5 FALSE Robbery, Extortion and Related Offences 194 224 0.155
# 6 TRUE Robbery, Extortion and Related Offences 31 55 0.774
# 7 FALSE Sexual Assault and Related Offences 144 165 0.146
# 8 TRUE Sexual Assault and Related Offences 47 57 0.213
# 9 FALSE Theft and Related Offences 14945 16040 0.0733
# 10 TRUE Theft and Related Offences 2455 2785 0.134
# 11 FALSE Unlawful Entry With Intent/Burglary, Break and Enter 3924 4292 0.0938
# 12 TRUE Unlawful Entry With Intent/Burglary, Break and Enter 426 348 -0.183
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment