Skip to content

Instantly share code, notes, and snippets.

@matt-dray
Last active August 15, 2025 13:42
Show Gist options
  • Select an option

  • Save matt-dray/f3063dd2f786053633191d6d8b7295e5 to your computer and use it in GitHub Desktop.

Select an option

Save matt-dray/f3063dd2f786053633191d6d8b7295e5 to your computer and use it in GitHub Desktop.
Assign abstracts to sifters as equally as possible so that each abstract is reviewed exactly n times, then add that content to spreadsheets that are unique to each sifter (pretty hacky, made quickly)
# Add abstract info and assessment columns to assignments
.prepare_sifter_outputs <- function(
assignments,
abstracts_df,
strip_abstract_info = TRUE,
add_assessment_columns = TRUE
) {
assignments <- lapply(
assignments,
\(x) abstracts_df[abstracts_df$abstract_id %in% x, ]
)
if (strip_abstract_info) {
columns_to_retain <- c(
"abstract_id",
"user_id",
"q9_abstract_title",
"q9_abstract",
"q7_domain_what",
"q7_domain_how",
"q7_domain_why"
)
assignments <- lapply(assignments, \(x) x[, columns_to_retain])
}
if (add_assessment_columns) {
assignments <- lapply(
assignments,
\(x) {
x["Contribution to decision making (1 to 6)"] <- NA_character_
x["Soundness of analytical methods (1 to 6)"] <- NA_character_
x["Reproducibility (1 to 6)"] <- NA_character_
x["Presentation and interest (1 to 6)"] <- NA_character_
x["Sifter comments"] <- NA_character_
return(x)
}
)
}
assignments
}
# Create spreadhseets with guidance and abstracts to assess
.generate_sifter_spreadsheets <- function(
assignments,
path = here::here("output"),
overwrite_file = TRUE
) {
for (i in seq_along(assignments)) {
sifter_name <- names(assignments[i])
wb <- openxlsx::createWorkbook()
# Styles ----
openxlsx::modifyBaseFont(
wb = wb,
fontSize = 11,
fontName = "Arial"
)
styles <- list(
title = openxlsx::createStyle(fontSize = 20, textDecoration = "bold"),
subtitle = openxlsx::createStyle(
fontSize = 14,
textDecoration = "bold",
valign = "bottom"
),
cell_wrap = openxlsx::createStyle(wrapText = TRUE),
cell_valign = openxlsx::createStyle(valign = "top")
)
# Cover ----
## Content ----
openxlsx::addWorksheet(wb, sheetName = "cover")
openxlsx::writeData(wb, sheet = "cover", x = "HACA Sifting 2024")
openxlsx::writeData(
wb,
sheet = "cover",
x = paste("Sifter:", sifter_name),
startRow = 2
)
openxlsx::writeData(
wb,
sheet = "cover",
x = "Deadline",
startRow = 3
)
openxlsx::writeData(
wb,
sheet = "cover",
x = "Please return by Wednesday 27 March at 11:00 to <email address>",
startRow = 4
)
openxlsx::writeData(
wb,
sheet = "cover",
x = "Domains",
startRow = 5
)
openxlsx::writeData(
wb,
sheet = "cover",
x = paste(
"This year's conference theme is 'by analysts, for analysts and with leaders' exploring the 'what, how and why of analytics':\n",
" • what health and care problems can analytics help address?\n",
" • how can these problems be addressed through analytical methods?\n",
" • why is high quality analysis needed to make impactful decisions?"
),
startRow = 6
)
openxlsx::writeData(
wb,
sheet = "cover",
x = "Scoring",
startRow = 7
)
openxlsx::writeData(
wb,
sheet = "cover",
x = paste(
"As a reviewer, please conduct a review of the abstracts assigned to you, scoring each category 1 to 6 (columns H to L in the 'abstracts' sheet).\n\n",
"You will be asked to rate each abstract on each of the following:\n",
" • Contribution to decision making (did this analysis inform strategy or decision making and how might this process be expected to influence policy and practice in the host organisation and beyond?)\n",
" • Soundness of analytical methods (novel or complex methods are not necessarily better, we are looking for the right analytical approach to the question)\n",
" • Reproducibility (are the methods well described and replicable?)\n",
" • Presentation and interest (does the abstract describe interesting work that is likely to provoke thought and develop practice within its area?)\n\n",
"The scores represent the following:\n",
" • 6 is outstanding (clear accept)\n",
" • 5 is excellent\n",
" • 4 is good\n",
" • 3 is satisfactory (borderline accept depending on the strength of other submissions)\n",
" • 2 is fair (may be accepted if other papers in this conference track are weak)\n",
" • 1 is reject (overall the abstract may be sound, but judged just on this score, a clear reject)\n\n",
"Further information:\n",
" • Submitters have provided a yes/no answer (columns E to G in the 'abstracts' sheet) to say whether their abstract fits the domains of the conference (see 'domains' section above), which may help in your decision making.",
" • Each abstract will be reviewed by two people and their scores considered together.\n",
" • Please note that the scores will not be averaged and none are essential for acceptance to the conference.\n",
" • Describe the abstract as accurately as you can within the scoring system.\n",
" • The final decision will rest on a consideration of each factor, and may differ by each parallel conference talk stream.\n",
" • Some weaker paper submissions may be accepted for a poster. If you wish to propose that a talk is accepted as a proposal please do so in the 'Comments' section, along with any other comments you might have."
),
startRow = 8
)
## Style ----
openxlsx::setColWidths(
wb,
sheet = "cover",
cols = 1,
widths = 100
)
openxlsx::addStyle(
wb,
sheet = "cover",
style = styles$cell_wrap,
rows = 1:8,
cols = 1,
gridExpand = TRUE,
stack = TRUE
)
openxlsx::addStyle(
wb,
sheet = "cover",
style = styles$title,
rows = 1,
cols = 1,
gridExpand = TRUE,
stack = TRUE
)
openxlsx::addStyle(
wb,
sheet = "cover",
style = styles$subtitle,
rows = c(3, 5, 7),
cols = 1,
gridExpand = TRUE,
stack = TRUE
)
openxlsx::setRowHeights(
wb = wb,
sheet = "cover",
rows = c(3, 5, 7),
heights = 34
)
# Abstracts ----
openxlsx::addWorksheet(wb, sheetName = "abstracts")
df <- assignments[[i]]
df_nrow <- nrow(df)
df_nrow_seq <- seq(df_nrow)
df_ncol <- ncol(df)
df_ncol_seq <- seq(df_ncol)
openxlsx::writeDataTable(
wb,
sheet = "abstracts",
x = df,
withFilter = FALSE
)
openxlsx::setColWidths(
wb,
sheet = "abstracts",
cols = df_ncol_seq,
widths = c(rep("auto", 2), 32, 64, rep("16", 7), 32)
)
openxlsx::addStyle(
wb,
sheet = "abstracts",
style = styles$cell_wrap,
rows = df_nrow_seq,
cols = 3:4,
gridExpand = TRUE,
stack = TRUE
)
openxlsx::addStyle(
wb,
sheet = "abstracts",
style = styles$cell_wrap,
rows = 1,
cols = df_ncol_seq,
gridExpand = TRUE,
stack = TRUE
)
openxlsx::addStyle(
wb,
sheet = "abstracts",
style = styles$cell_valign,
rows = df_nrow_seq,
cols = df_ncol_seq,
gridExpand = TRUE,
stack = TRUE
)
openxlsx::dataValidation(
wb,
sheet = "abstracts",
rows = df_nrow_seq + 1,
cols = 8:11,
type = "whole",
operator = "between",
value = c(1, 6)
)
# Save
sifter_name <- gsub("[[:punct:]]", "", tolower(names(assignments[i])))
sifter_name <- gsub("[[:space:]]", "-", sifter_name)
sifter_file <- paste0("haca-2024-sifting_", sifter_name, ".xlsx")
sifter_path <- file.path(path, sifter_file)
cat(paste0("> Writing ", basename(sifter_path), "... "))
openxlsx::saveWorkbook(wb, sifter_path, overwrite = overwrite_file)
cat("done.\n")
}
}
# As below, but checks for exact name and affiliation matches between
# sifters and abstracts.
#
# It's possible certain abstracts might not get assigned, especially given combos
# of sifters writing abstracts, abstracts from sifters' affiliations, assignment
# capping and sifter capping. Should probably report any abstracts that have
# <assignment_cap assignments. As a precaution, have built in a max_iterations arg
# in case of infinite looping, but I don't think that will come into play.
.resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample
.assign_abstracts <- function(
abstracts, # c("First Last" = "Org")
sifters, # c("First Last" = "Org")
assignment_cap = 2,
sifter_caps = NULL, # c("First Last" = 5)
max_iterations = 1000 # just in case?
) {
n_abstracts <- length(abstracts)
seq_abstracts <- seq_len(n_abstracts)
sifter_assignments <- setNames(vector("list", length(sifters)), names(sifters))
assignment_counts <- rep(0, n_abstracts)
iter <- 0
repeat {
for (i in names(sifter_assignments)) {
# 1. Find the pool of abstracts available to this sifter (if any)
# a. Check if sifter cap has been met
sifter_has_cap <- i %in% names(sifter_caps)
if (sifter_has_cap) {
sifter_cap <- sifter_caps[[i]]
sifter_assignment_count <- length(sifter_assignments[[i]])
}
if (sifter_has_cap && sifter_assignment_count == sifter_cap) next
# b. Add abstracts to pool if they have <n assignments
abstracts_under_min_count <- which(assignment_counts < assignment_cap)
if (length(abstracts_under_min_count) == 0) next
# c. Remove abstracts that are already assigned to this sifter
already_assigned_to_sifter <- sifter_assignments[[i]]
abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter]
if (length(abstracts_available) == 0) next
# d. Remove abstracts by the named sifter
abstracts_by_sifter <- which(i == names(abstracts[abstracts_available]))
if (length(abstracts_by_sifter) > 0) {
abstracts_available <- abstracts_available[-abstracts_by_sifter]
}
if (length(abstracts_available) == 0) next
# e. Remove abstracts with the same affiliation as the sifter
sifter_affiliation <- unname(sifters[i])
abstracts_by_same_affiliation <- which(sifter_affiliation == unname(abstracts[abstracts_available]))
if (length(abstracts_by_same_affiliation) > 0) {
abstracts_available <- abstracts_available[-abstracts_by_same_affiliation]
}
if (length(abstracts_available) == 0) next
# 2. Select from pool randomly and assign to sifter
abstract_selected <- .resample(abstracts_available, 1)
sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected)
# 3. Increment count for sampled abstract
assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1
if (all(assignment_counts == assignment_cap)) break
}
# Reorder so sifter with fewest assignments gets next assignment
sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))]
iter <- iter + 1
if (all(assignment_counts == assignment_cap)) break
if (iter == max_iterations) {
cat("max_iterations reached")
break
}
}
sifter_assignments <- lapply(sifter_assignments, sort)
sifter_assignments[order(names(sifter_assignments))]
}
# Prepare demo sifter and abstract sets
set.seed(1)
n_sifters <- 3
sifter_set <- setNames(charlatan::ch_company(n_sifters), charlatan::ch_name(n_sifters))
sifter_names <- names(sifter_set)
sifter_companies <- unname(sifter_set)
abstract_name_pool <- c(sifter_names, charlatan::ch_name(10))
abstract_company_pool <- c(rep(sifter_companies, 2), charlatan::ch_company(7))
abstract_set <- setNames(abstract_company_pool, abstract_name_pool)
abstract_set <- sample(abstract_set, length(abstract_set))
.assign_abstracts(
abstracts = abstract_set,
sifters = sifter_set,
assignment_cap = 2,
sifter_caps = c("Channing Glover" = 3)
)
.resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample
.assign_sifters <- function(
n_abstracts,
sifters,
sifter_caps,
n_assignments
) {
seq_abstracts <- seq_len(n_abstracts)
sifter_assignments <- setNames(vector("list", length(sifters)), sifters)
assignment_counts <- rep(0, n_abstracts)
repeat_iter <- 0
repeat {
for (i in names(sifter_assignments)) {
# 1. Find the pool of abstracts available to this sifter (if any)
# a. Check if sifter cap has been met
# b. Add abstracts to pool if they have <n assignments
# c. Remove abstracts that are already assigned to this sifter
sifter_cap <- sifter_caps[[i]]
sifter_assignment_count <- length(sifter_assignments[[i]])
if (!is.null(sifter_cap) && sifter_assignment_count == sifter_cap) next
abstracts_under_min_count <- which(assignment_counts < n_assignments)
if (length(abstracts_under_min_count) == 0) next
already_assigned_to_sifter <- sifter_assignments[[i]]
abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter]
if (length(abstracts_available) == 0) next
# 2. Select from pool randomly and assign to sifter
abstract_selected <- .resample(abstracts_available, 1)
sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected)
# 3. Increment count for sampled abstract
assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1
if (all(assignment_counts == n_assignments)) break
}
# Reorder so sifter with fewest assignments gets next assignment
sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))]
repeat_iter <- repeat_iter + 1
if (all(assignment_counts == n_assignments)) break
}
sifter_assignments <- lapply(sifter_assignments, sort)
sifter_assignments[order(names(sifter_assignments))]
}
reps <- 3
x <- .assign_sifters(
n_abstracts = 223,
sifters = LETTERS[1:5],
sifter_caps = list(A = 10, B = 20),
reps
)
lengths(x)
# A B C D E
# 10 20 139 139 138
unique(table(unlist(x))) == reps
# [1] TRUE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment