Skip to content

Instantly share code, notes, and snippets.

@gdbassett
Last active August 15, 2017 18:13
Show Gist options
  • Save gdbassett/fc5bdcbfec538d10982aeff0d3c4941f to your computer and use it in GitHub Desktop.
Save gdbassett/fc5bdcbfec538d10982aeff0d3c4941f to your computer and use it in GitHub Desktop.
bayesian credible intervals on veris data
# pick an enumeration
enum <- "action.*.variety"
# establish filter criteria (easier than a complex standard-eval filter_ line)
df <- vcdb %>%
dplyr::filter(plus.dbir_year == 2016, subset.2017dbir) %>%
dplyr::filter(attribute.confidentiality.data_disclosure.Yes) %>%
dplyr::filter(victim.industry2.92)
# establish priors from previous year
priors <- df %>%
dbirR::getenumCI(enum, short.names=FALSE) %>%
dplyr::select(enum, p1 = freq) %>%
dplyr::mutate(p2 = 1-p1)
# calculate this year's data. Add wilson CI's for reference.
chunk <- df %>%
dbirR::getenumCI(enum, ci.level=0.95, ci.method="wilson", short.names=FALSE) %>%
dplyr::left_join(priors, by="enum")
# remove those without sample sizes
chunk <- chunk[!is.na(chunk$n), ]
# calculate bayesian credible interval and bind the rows to the chunk with wilson CIs.
chunk <- rbind(
cbind(
chunk %>%
dplyr::select(enum),
binom::binom.bayes(chunk$x, chunk$n, conf.level=0.95, type="highest", prior.shape1=chunk$p1, prior.shape2=chunk$p2)
) %>%
dplyr::select(enum, x, n, freq=mean, method, lower, upper),
chunk %>%
dplyr::select(enum, x, n, freq, method, lower, upper)
)
# plot the comparison of the results
chunk %>%
dplyr::filter(x!=0) %>%
dplyr::mutate(method = as.character(method)) %>%
dplyr::mutate(method = ifelse(method == "bayes*", "bayes", method)) %>%
tidyr::gather("loc", "value", lower, upper) %>%
dplyr::mutate(group = paste(enum, method)) %>%
ggplot(aes(x=value, y=enum, group=group, color=method)) +
geom_point(aes(shape=method), alpha=0.5) +
geom_line(alpha=0.5) +
ggthemes::scale_color_tableau()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment