Created
April 23, 2020 15:37
-
-
Save srvanderplas/9cb0268df99e97a9ce327bb1489f7046 to your computer and use it in GitHub Desktop.
R code to simulate Bunch & Murphy (2003) test of forensic identification of cartridge cases
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Bunch and Murphy simulation | |
library(tidyverse) | |
# Packet 1 contains 10 fires from the same weapon | |
# Packet 2 contains 1 from each of the other 9 glocks, plus one non-glock | |
# Packets 3-8 contain | |
# - 0, 1, or 2 non-glocks | |
# - 10, 9, or 8 randomly sampled from each of the other glocks | |
# After packet 2 is assembled, then, the following cartridges remain: | |
bunch_test_kits <- function() { | |
# Accounting for packet 2 | |
unknown_2 <- sample(c("S", "B"), size = 1) | |
unknown_left <- c(rep(unknown_2, 9), rep(setdiff(c("S", "B"), unknown_2), 10)) | |
# Number of unknowns | |
n_unk <- purrr::map_int(3:8, ~sample(0:2, 1)) | |
n_k <- 10 - n_unk | |
unk_order <- sample(size = 19, unknown_left, replace = F) | |
known_order <- sample(size = 81, rep(2:10, each = 9)) %>% as.character() | |
kits <- purrr::map(1:8, | |
function(x) { | |
if (x < 7) { | |
start_unk <- ifelse(x == 1, 1, sum(n_unk[1:(x-1)]) + 1) | |
idxs <- if(n_unk[x] > 0) start_unk:(start_unk + n_unk[x] - 1) else NULL | |
# print(idxs) | |
unk <- unk_order[idxs] | |
start_k <- ifelse(x == 1, 1, sum(n_k[1:(x-1)]) + 1) | |
idxk <- start_k:(start_k + n_k[x] - 1) | |
res <- c(unk, known_order[idxk]) | |
} else if (x == 7) { | |
res <- as.character(rep(1, times = 10)) | |
} else { | |
res <- c(2:10, unknown_2) | |
} | |
sample(res, size = 10) # randomly permute sample order | |
}) | |
} | |
prev_exists <- function(idx, x) { | |
# This function determines whether the ordered comparison has | |
# been performed previously in the examination | |
if (idx < 10) return(FALSE) | |
x[idx] %in% unique(x[1:(idx - 1)]) | |
} | |
fbi_elim_calc <- function(x, y) { | |
# FBI only allows elimination based on class characteristics | |
# This function returns 0 if there is a class characteristic match, | |
# and 1 if there is a class characteristic mismatch | |
x1 <- ifelse(x %in% c("S", "B"), x, "A") | |
y1 <- ifelse(y %in% c("S", "B"), y, "A") | |
x1 != y1 | |
} | |
bunch_eval <- function(set) { | |
# This function calculates out the number of comparisons necessary | |
# on each set, with and without deductive reasoning. | |
idx <- 1:length(set) | |
setf <- factor(set, levels = unique(set), ordered = T) | |
comparisons <- crossing(i1 = idx, i2 = idx) %>% | |
# This is the max number of comparisons | |
filter(i1 < i2) %>% | |
mutate(set1 = setf[i1], set2 = setf[i2], | |
f1 = as.numeric(set1), f2 = as.numeric(set2), | |
idx = row_number(), | |
match = set1 == set2, | |
fbi_elim = map2_lgl(set1, set2, fbi_elim_calc), | |
# Set of ordered pairwise comparisons (by source) | |
orderedf = map2_chr(as.numeric(set1), as.numeric(set2), | |
function(x, y) sort(unique(c(x, y))) %>% paste(collapse = ",")), | |
# Calculate the highest source-source match found | |
max_match = ifelse(match, f1, 0) %>% cummax %>% pmin(., i1 - 1), | |
# has the orderedf comparison been performed before? | |
compared_before = map_lgl(idx, partial(prev_exists, x = orderedf)) | |
) %>% | |
# Once a new source-source match is found, you still have to finish out the row | |
# with new comparisons, so group by row and take the min value | |
group_by(i1) %>% | |
mutate(min_match = min(max_match)) %>% | |
ungroup() %>% | |
mutate(full_row = pmin(f1, f2) <= min_match, | |
# If you've completed the full row of comparisons for that match | |
# and you have made the comparison before, you can infer the result | |
# from previous work | |
inferred = full_row * compared_before) | |
comparisons %>% | |
summarize(matches_all = sum(match), matches_min = sum(match[!inferred]), | |
nonmatches_all = sum(!match), nonmatches_min = sum(!match[!inferred]), | |
fbi_elim_all = sum(fbi_elim), fbi_elim_min = sum(fbi_elim[!inferred]), | |
optional = sum(inferred)) | |
} | |
res <- purrr::map_df(1:500000, function(i) { # With 500k iterations, this takes days to run on a relatively fast machine | |
tibble(kit = bunch_test_kits()) %>% | |
mutate(eval = parallel::mclapply(kit, bunch_eval)) | |
}) | |
res <- unnest(res, eval) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment