Created
August 1, 2025 04:08
-
-
Save bohdanszymanik/4e0a57cf4a2a1973cafdbcd4273ee066 to your computer and use it in GitHub Desktop.
Wellington CCTV crime comparison cleaner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(sf) | |
library(tictoc) | |
library(future) | |
library(units) | |
#library(remotes) | |
#install_github("yonghah/esri2sf") | |
library("esri2sf") | |
library(dplyr) | |
library(purrr) | |
data <- tibble( | |
x = 1:3, | |
y = 4:6, | |
z = 7:9 | |
) | |
# Define a function that takes multiple arguments | |
my_function <- function(arg1, arg2) { | |
arg1 * arg2 | |
} | |
# Apply the function row-wise using rowwise() and pmap() | |
result <- data %>% | |
rowwise() %>% | |
mutate(a = pmap_dbl(list(x, y), my_function)) %>% | |
ungroup() # Ungroup to remove the rowwise attribute | |
print(result) | |
# | |
# 1. First we get the CCTV camera locations, and more importantly, the viewsheds | |
# | |
sf_use_s2(TRUE) | |
wgtn_cctv_url <- "https://services1.arcgis.com/CPYspmTk3abe6d7i/arcgis/rest/services/CCTV_Viewsheds_(View_layer)/FeatureServer/0" | |
wgtn_cctv <- esri2sf(wgtn_cctv_url) | |
glimpse(wgtn_cctv) | |
# take a peek at the geometry column - normal glimpse won't show me enough | |
sf::st_as_text(wgtn_cctv$geoms) | |
wgtn_cctv |> slice_head(n = 2) |> mutate(wkt = st_as_text(geoms)) | |
ggplot(wgtn_cctv) + | |
geom_sf() + | |
coord_sf(xlim = c(174.77, 174.78), ylim = c(-41.275, -41.30)) + | |
geom_sf_label(aes(label = Camera_Name), size = 2) | |
# | |
# 2. Next we load in the meshblocks 2013 for wellington from https://datafinder.stats.govt.nz/layer/8347-meshblock-2013/ | |
# I suggest cropping down to just the Wellington region first otherwise you're just wasting memory and compute | |
# | |
mb2013_wgtn <- st_read("C:\\Users\\...\\wd\\Areas\\statsnz-meshblock-2013-SHP-wgtn\\meshblock-2013.shp") | |
ggplot(data = mb2013_wgtn) + | |
geom_sf() + | |
theme_minimal() | |
# I could clip meshblocks to the coastline... but actually I'm focussed on inner city areas where that's not a significant issue for me | |
# however, for the sake of completeness... let's do it. We use https://data.linz.govt.nz/layer/51153-nz-coastlines-and-islands-polygons-topo-150k/ | |
# (it needs to be the polygons dataset not one with polylines!) | |
# I strongly suggest clipping that down inside the Koordinates viewer to eg the Wellington Region TA | |
nz_coastline <- st_read("C:\\Users\\...\\Downloads\\lds-nz-coastlines-and-islands-polygons-topo-150k-SHP\\nz-coastlines-and-islands-polygons-topo-150k.shp") | |
# clip to wellington region | |
wgtn_bbox <- st_as_sfc(mb2013_wgtn |> st_bbox()) | |
# ggplot(data = wgtn_bbox) + | |
# geom_sf() + | |
# theme_minimal() | |
tic() | |
nz_coastline_wgtn <- st_intersection(wgtn_bbox, nz_coastline) | |
toc() | |
ggplot(data = nz_coastline_wgtn) + | |
geom_sf() + | |
theme_minimal() | |
# the meshblocks come as multipolygons - for handling lakes etc? | |
# but this slows down geometric operations so if we can get away with | |
# polygons then this will speed things up - actually based on numbers here | |
# probably not necessary | |
# 1.08s | |
tic() | |
mb2013_wgtn_p <- st_make_valid(st_cast(mb2013_wgtn, "POLYGON")) | |
toc() | |
# 5.15 sec | |
tic() | |
mb2013_wgtn_p_clip <- st_intersection(mb2013_wgtn_p, nz_coastline_wgtn) | |
toc() | |
ggplot(data = mb2013_wgtn_p_clip) + | |
geom_sf() + | |
theme_minimal() | |
# 5.77 sec | |
tic() | |
mb2013_wgtn_clip <- st_intersection(mb2013_wgtn, nz_coastline_wgtn) | |
toc() | |
ggplot(data = mb2013_wgtn_clip) + | |
geom_sf() + | |
theme_minimal() | |
# it appears we have some LandAreaSQ measures that don't tally with what is calculated by st_area() so let's add a calculated area | |
mb2013_wgtn_clip$LandAreaSQ_calc <- mb2013_wgtn_clip |> st_area() | |
# let's union together all those overlaying viewsheds to simplify the geometry | |
wgtn_cctv_union <- wgtn_cctv |> st_union() | |
ggplot(data = wgtn_cctv_union) + | |
geom_sf() + | |
theme_minimal() | |
# and now remove the viewsheds from the meshblocks | |
wgtn_mbs_less_viewsheds <- st_difference(mb2013_wgtn_clip, wgtn_cctv_union) # a tiny bit faster with mb2013_wgtn_small | |
# won't be able to see the impact at the regional level so zoom into central wellington | |
# where Lambton Quay meets Willis St | |
ggplot(data = wgtn_mbs_less_viewsheds) + | |
geom_sf() + | |
coord_sf(xlim = c(174.772, 174.778), ylim = c(-41.281, -41.285)) + | |
theme_minimal() | |
################################################################# | |
# Next task is to figure out by how much the area of a meshblock has decreased as a result of | |
wgtn_mbs_less_viewsheds$AreaMinusViewshed <- st_area(wgtn_mbs_less_viewsheds) | |
wgtn_mbs_less_viewsheds$ChangeInArea <- wgtn_mbs_less_viewsheds$LandAreaSQ_calc - wgtn_mbs_less_viewsheds$AreaMinusViewshed | |
wgtn_mbs_less_viewsheds$ChangeInAreaRatio <- 1 - drop_units(wgtn_mbs_less_viewsheds$AreaMinusViewshed / wgtn_mbs_less_viewsheds$LandAreaSQ_calc) | |
################################################################# | |
# and now need to join to the crime data | |
vtp <- read_csv("C:\\Users\\...\\wd\\VictimisationByMeshblock\\vtp_jul2025.csv", | |
col_types = cols( | |
`Meshblock` = col_character() | |
)) | |
glimpse(vtp) | |
vtp$year <- as.numeric( vtp$`Month Year` |> str_sub(-4) ) | |
vtp_year_mb <- | |
vtp |> | |
group_by(year, meshblock = Meshblock) |> | |
summarise( sum_v = sum(Victimisations)) | |
glimpse(vtp_year_mb) | |
wgtn_mbs_less_viewsheds_vtp <- | |
wgtn_mbs_less_viewsheds |> | |
select(MeshblockN, ChangeInArea, ChangeInAreaRatio, AreaMinusViewshed, LandAreaSQ_calc) |> | |
left_join(vtp_year_mb, join_by(MeshblockN == meshblock)) | |
glimpse(wgtn_mbs_less_viewsheds_vtp) | |
# the vtp data only goes back to june 2021 - that sux as it's impacted by covid and it's a midyear value | |
wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024 <- | |
wgtn_mbs_less_viewsheds_vtp |> filter(year %in% c(2022, 2024)) |> | |
pivot_wider( | |
names_from = year, | |
values_from = sum_v | |
) | |
wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024$v_2022_2024 <- wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024$`2024` - wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024$`2022` | |
# not sure there's a significant movement below the 0 line | |
ggplot(data = wgtn_mbs_less_viewsheds_vtp_2022_vrs_2024) + | |
geom_point(mapping = aes(x = ChangeInAreaRatio, y = v_2022_2024)) + | |
labs(x = "Proportion of Meshblock in Viewshed", y = "Victimisations 2024 minus victimisations 2022") | |
## using the ANZSOC Division as an asthetic to colour the points? | |
vtp_year_mb_anzsocd <- | |
vtp |> | |
group_by(year, meshblock = Meshblock, anzsoc_division = `ANZSOC Division`) |> | |
summarise( sum_v = sum(Victimisations)) | |
glimpse(vtp_year_mb) | |
wgtn_mbs_less_viewsheds_vtp_anzsocd <- | |
wgtn_mbs_less_viewsheds |> | |
select(MeshblockN, ChangeInArea, ChangeInAreaRatio, AreaMinusViewshed, LandAreaSQ_calc) |> | |
left_join(vtp_year_mb_anzsocd, join_by(MeshblockN == meshblock)) | |
wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024 <- | |
wgtn_mbs_less_viewsheds_vtp_anzsocd |> filter(year %in% c(2022, 2024)) |> | |
pivot_wider( | |
names_from = year, | |
values_from = sum_v | |
) | |
wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024$v_2022_2024 <- wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024$`2024` - wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024$`2022` | |
# the ANZSOC Division colours again don't show a significant distribution one way or the other | |
ggplot(data = wgtn_mbs_less_viewsheds_vtp_anzsocd_2022_vrs_2024) + | |
geom_point(mapping = aes(x = ChangeInAreaRatio, y = v_2022_2024, colour = anzsoc_division)) + | |
labs(x = "Proportion of Meshblock in Viewshed", y = "Victimisations 2024 minus victimisations 2022") | |
# another way to look at the data is the absolute change in victimisations | |
# from 2022 to 2024 | |
# split by there being a noticeable camera deployment versus no deployment | |
# using an area change of 0.5% as a measure of CCTV deployment | |
# (remember these viewsheds are only a small part of a meshblock's area) | |
wgtn_mbs_less_viewsheds_vtp_2022_2024_abs <- | |
wgtn_mbs_less_viewsheds_vtp |> | |
filter(year %in% c(2022, 2024)) |> | |
group_by(CameraImpact = ChangeInAreaRatio > 0.01, year) |> | |
summarise(sum_sum_v = sum(sum_v)) | |
wgtn_mbs_less_viewsheds_vtp_2022_2024_abs |> | |
st_drop_geometry() |> | |
pivot_wider( | |
names_from = year, | |
values_from = sum_sum_v | |
) |> | |
mutate(change_ratio = (`2024` - `2022`)/`2022`) | |
# CameraImpact `2022` `2024` change_ratio | |
# <lgl> <dbl> <dbl> <dbl> | |
# 1 FALSE 20050 21815 0.0880 | |
# 2 TRUE 4606 5022 0.0903 | |
# seems very strange that there is no benefit at all from the CCTV deployment? | |
# Let's look at the different ANZSOC Divisions and see if that shows anything | |
wgtn_mbs_less_viewsheds_vtp_2022_2024_anzsocd_abs <- | |
wgtn_mbs_less_viewsheds_vtp_anzsocd |> | |
filter(year %in% c(2022, 2024)) |> | |
group_by(CameraImpact = ChangeInAreaRatio > 0.05, year, anzsoc_division) |> | |
summarise(sum_sum_v = sum(sum_v)) | |
wgtn_mbs_less_viewsheds_vtp_2022_2024_anzsocd_abs |> | |
st_drop_geometry() |> | |
pivot_wider( | |
names_from = year, | |
values_from = sum_sum_v | |
) |> | |
mutate(change_ratio = (`2024` - `2022`)/`2022`) |> | |
arrange(anzsoc_division) | |
# # Groups: CameraImpact [2] | |
# CameraImpact anzsoc_division `2022` `2024` change_ratio | |
# <lgl> <chr> <dbl> <dbl> <dbl> | |
# 1 FALSE Abduction, Harassment and Other Related Offences Against a Person 22 18 -0.182 | |
# 2 TRUE Abduction, Harassment and Other Related Offences Against a Person 4 3 -0.25 | |
# 3 FALSE Acts Intended to Cause Injury 1784 2119 0.188 | |
# 4 TRUE Acts Intended to Cause Injury 680 731 0.075 | |
# 5 FALSE Robbery, Extortion and Related Offences 194 224 0.155 | |
# 6 TRUE Robbery, Extortion and Related Offences 31 55 0.774 | |
# 7 FALSE Sexual Assault and Related Offences 144 165 0.146 | |
# 8 TRUE Sexual Assault and Related Offences 47 57 0.213 | |
# 9 FALSE Theft and Related Offences 14945 16040 0.0733 | |
# 10 TRUE Theft and Related Offences 2455 2785 0.134 | |
# 11 FALSE Unlawful Entry With Intent/Burglary, Break and Enter 3924 4292 0.0938 | |
# 12 TRUE Unlawful Entry With Intent/Burglary, Break and Enter 426 348 -0.183 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment