Last active
August 15, 2025 13:42
-
-
Save matt-dray/f3063dd2f786053633191d6d8b7295e5 to your computer and use it in GitHub Desktop.
Assign abstracts to sifters as equally as possible so that each abstract is reviewed exactly n times, then add that content to spreadsheets that are unique to each sifter (pretty hacky, made quickly)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Add abstract info and assessment columns to assignments | |
| .prepare_sifter_outputs <- function( | |
| assignments, | |
| abstracts_df, | |
| strip_abstract_info = TRUE, | |
| add_assessment_columns = TRUE | |
| ) { | |
| assignments <- lapply( | |
| assignments, | |
| \(x) abstracts_df[abstracts_df$abstract_id %in% x, ] | |
| ) | |
| if (strip_abstract_info) { | |
| columns_to_retain <- c( | |
| "abstract_id", | |
| "user_id", | |
| "q9_abstract_title", | |
| "q9_abstract", | |
| "q7_domain_what", | |
| "q7_domain_how", | |
| "q7_domain_why" | |
| ) | |
| assignments <- lapply(assignments, \(x) x[, columns_to_retain]) | |
| } | |
| if (add_assessment_columns) { | |
| assignments <- lapply( | |
| assignments, | |
| \(x) { | |
| x["Contribution to decision making (1 to 6)"] <- NA_character_ | |
| x["Soundness of analytical methods (1 to 6)"] <- NA_character_ | |
| x["Reproducibility (1 to 6)"] <- NA_character_ | |
| x["Presentation and interest (1 to 6)"] <- NA_character_ | |
| x["Sifter comments"] <- NA_character_ | |
| return(x) | |
| } | |
| ) | |
| } | |
| assignments | |
| } | |
| # Create spreadhseets with guidance and abstracts to assess | |
| .generate_sifter_spreadsheets <- function( | |
| assignments, | |
| path = here::here("output"), | |
| overwrite_file = TRUE | |
| ) { | |
| for (i in seq_along(assignments)) { | |
| sifter_name <- names(assignments[i]) | |
| wb <- openxlsx::createWorkbook() | |
| # Styles ---- | |
| openxlsx::modifyBaseFont( | |
| wb = wb, | |
| fontSize = 11, | |
| fontName = "Arial" | |
| ) | |
| styles <- list( | |
| title = openxlsx::createStyle(fontSize = 20, textDecoration = "bold"), | |
| subtitle = openxlsx::createStyle( | |
| fontSize = 14, | |
| textDecoration = "bold", | |
| valign = "bottom" | |
| ), | |
| cell_wrap = openxlsx::createStyle(wrapText = TRUE), | |
| cell_valign = openxlsx::createStyle(valign = "top") | |
| ) | |
| # Cover ---- | |
| ## Content ---- | |
| openxlsx::addWorksheet(wb, sheetName = "cover") | |
| openxlsx::writeData(wb, sheet = "cover", x = "HACA Sifting 2024") | |
| openxlsx::writeData( | |
| wb, | |
| sheet = "cover", | |
| x = paste("Sifter:", sifter_name), | |
| startRow = 2 | |
| ) | |
| openxlsx::writeData( | |
| wb, | |
| sheet = "cover", | |
| x = "Deadline", | |
| startRow = 3 | |
| ) | |
| openxlsx::writeData( | |
| wb, | |
| sheet = "cover", | |
| x = "Please return by Wednesday 27 March at 11:00 to <email address>", | |
| startRow = 4 | |
| ) | |
| openxlsx::writeData( | |
| wb, | |
| sheet = "cover", | |
| x = "Domains", | |
| startRow = 5 | |
| ) | |
| openxlsx::writeData( | |
| wb, | |
| sheet = "cover", | |
| x = paste( | |
| "This year's conference theme is 'by analysts, for analysts and with leaders' exploring the 'what, how and why of analytics':\n", | |
| " • what health and care problems can analytics help address?\n", | |
| " • how can these problems be addressed through analytical methods?\n", | |
| " • why is high quality analysis needed to make impactful decisions?" | |
| ), | |
| startRow = 6 | |
| ) | |
| openxlsx::writeData( | |
| wb, | |
| sheet = "cover", | |
| x = "Scoring", | |
| startRow = 7 | |
| ) | |
| openxlsx::writeData( | |
| wb, | |
| sheet = "cover", | |
| x = paste( | |
| "As a reviewer, please conduct a review of the abstracts assigned to you, scoring each category 1 to 6 (columns H to L in the 'abstracts' sheet).\n\n", | |
| "You will be asked to rate each abstract on each of the following:\n", | |
| " • Contribution to decision making (did this analysis inform strategy or decision making and how might this process be expected to influence policy and practice in the host organisation and beyond?)\n", | |
| " • Soundness of analytical methods (novel or complex methods are not necessarily better, we are looking for the right analytical approach to the question)\n", | |
| " • Reproducibility (are the methods well described and replicable?)\n", | |
| " • Presentation and interest (does the abstract describe interesting work that is likely to provoke thought and develop practice within its area?)\n\n", | |
| "The scores represent the following:\n", | |
| " • 6 is outstanding (clear accept)\n", | |
| " • 5 is excellent\n", | |
| " • 4 is good\n", | |
| " • 3 is satisfactory (borderline accept depending on the strength of other submissions)\n", | |
| " • 2 is fair (may be accepted if other papers in this conference track are weak)\n", | |
| " • 1 is reject (overall the abstract may be sound, but judged just on this score, a clear reject)\n\n", | |
| "Further information:\n", | |
| " • Submitters have provided a yes/no answer (columns E to G in the 'abstracts' sheet) to say whether their abstract fits the domains of the conference (see 'domains' section above), which may help in your decision making.", | |
| " • Each abstract will be reviewed by two people and their scores considered together.\n", | |
| " • Please note that the scores will not be averaged and none are essential for acceptance to the conference.\n", | |
| " • Describe the abstract as accurately as you can within the scoring system.\n", | |
| " • The final decision will rest on a consideration of each factor, and may differ by each parallel conference talk stream.\n", | |
| " • Some weaker paper submissions may be accepted for a poster. If you wish to propose that a talk is accepted as a proposal please do so in the 'Comments' section, along with any other comments you might have." | |
| ), | |
| startRow = 8 | |
| ) | |
| ## Style ---- | |
| openxlsx::setColWidths( | |
| wb, | |
| sheet = "cover", | |
| cols = 1, | |
| widths = 100 | |
| ) | |
| openxlsx::addStyle( | |
| wb, | |
| sheet = "cover", | |
| style = styles$cell_wrap, | |
| rows = 1:8, | |
| cols = 1, | |
| gridExpand = TRUE, | |
| stack = TRUE | |
| ) | |
| openxlsx::addStyle( | |
| wb, | |
| sheet = "cover", | |
| style = styles$title, | |
| rows = 1, | |
| cols = 1, | |
| gridExpand = TRUE, | |
| stack = TRUE | |
| ) | |
| openxlsx::addStyle( | |
| wb, | |
| sheet = "cover", | |
| style = styles$subtitle, | |
| rows = c(3, 5, 7), | |
| cols = 1, | |
| gridExpand = TRUE, | |
| stack = TRUE | |
| ) | |
| openxlsx::setRowHeights( | |
| wb = wb, | |
| sheet = "cover", | |
| rows = c(3, 5, 7), | |
| heights = 34 | |
| ) | |
| # Abstracts ---- | |
| openxlsx::addWorksheet(wb, sheetName = "abstracts") | |
| df <- assignments[[i]] | |
| df_nrow <- nrow(df) | |
| df_nrow_seq <- seq(df_nrow) | |
| df_ncol <- ncol(df) | |
| df_ncol_seq <- seq(df_ncol) | |
| openxlsx::writeDataTable( | |
| wb, | |
| sheet = "abstracts", | |
| x = df, | |
| withFilter = FALSE | |
| ) | |
| openxlsx::setColWidths( | |
| wb, | |
| sheet = "abstracts", | |
| cols = df_ncol_seq, | |
| widths = c(rep("auto", 2), 32, 64, rep("16", 7), 32) | |
| ) | |
| openxlsx::addStyle( | |
| wb, | |
| sheet = "abstracts", | |
| style = styles$cell_wrap, | |
| rows = df_nrow_seq, | |
| cols = 3:4, | |
| gridExpand = TRUE, | |
| stack = TRUE | |
| ) | |
| openxlsx::addStyle( | |
| wb, | |
| sheet = "abstracts", | |
| style = styles$cell_wrap, | |
| rows = 1, | |
| cols = df_ncol_seq, | |
| gridExpand = TRUE, | |
| stack = TRUE | |
| ) | |
| openxlsx::addStyle( | |
| wb, | |
| sheet = "abstracts", | |
| style = styles$cell_valign, | |
| rows = df_nrow_seq, | |
| cols = df_ncol_seq, | |
| gridExpand = TRUE, | |
| stack = TRUE | |
| ) | |
| openxlsx::dataValidation( | |
| wb, | |
| sheet = "abstracts", | |
| rows = df_nrow_seq + 1, | |
| cols = 8:11, | |
| type = "whole", | |
| operator = "between", | |
| value = c(1, 6) | |
| ) | |
| # Save | |
| sifter_name <- gsub("[[:punct:]]", "", tolower(names(assignments[i]))) | |
| sifter_name <- gsub("[[:space:]]", "-", sifter_name) | |
| sifter_file <- paste0("haca-2024-sifting_", sifter_name, ".xlsx") | |
| sifter_path <- file.path(path, sifter_file) | |
| cat(paste0("> Writing ", basename(sifter_path), "... ")) | |
| openxlsx::saveWorkbook(wb, sifter_path, overwrite = overwrite_file) | |
| cat("done.\n") | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # As below, but checks for exact name and affiliation matches between | |
| # sifters and abstracts. | |
| # | |
| # It's possible certain abstracts might not get assigned, especially given combos | |
| # of sifters writing abstracts, abstracts from sifters' affiliations, assignment | |
| # capping and sifter capping. Should probably report any abstracts that have | |
| # <assignment_cap assignments. As a precaution, have built in a max_iterations arg | |
| # in case of infinite looping, but I don't think that will come into play. | |
| .resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample | |
| .assign_abstracts <- function( | |
| abstracts, # c("First Last" = "Org") | |
| sifters, # c("First Last" = "Org") | |
| assignment_cap = 2, | |
| sifter_caps = NULL, # c("First Last" = 5) | |
| max_iterations = 1000 # just in case? | |
| ) { | |
| n_abstracts <- length(abstracts) | |
| seq_abstracts <- seq_len(n_abstracts) | |
| sifter_assignments <- setNames(vector("list", length(sifters)), names(sifters)) | |
| assignment_counts <- rep(0, n_abstracts) | |
| iter <- 0 | |
| repeat { | |
| for (i in names(sifter_assignments)) { | |
| # 1. Find the pool of abstracts available to this sifter (if any) | |
| # a. Check if sifter cap has been met | |
| sifter_has_cap <- i %in% names(sifter_caps) | |
| if (sifter_has_cap) { | |
| sifter_cap <- sifter_caps[[i]] | |
| sifter_assignment_count <- length(sifter_assignments[[i]]) | |
| } | |
| if (sifter_has_cap && sifter_assignment_count == sifter_cap) next | |
| # b. Add abstracts to pool if they have <n assignments | |
| abstracts_under_min_count <- which(assignment_counts < assignment_cap) | |
| if (length(abstracts_under_min_count) == 0) next | |
| # c. Remove abstracts that are already assigned to this sifter | |
| already_assigned_to_sifter <- sifter_assignments[[i]] | |
| abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter] | |
| if (length(abstracts_available) == 0) next | |
| # d. Remove abstracts by the named sifter | |
| abstracts_by_sifter <- which(i == names(abstracts[abstracts_available])) | |
| if (length(abstracts_by_sifter) > 0) { | |
| abstracts_available <- abstracts_available[-abstracts_by_sifter] | |
| } | |
| if (length(abstracts_available) == 0) next | |
| # e. Remove abstracts with the same affiliation as the sifter | |
| sifter_affiliation <- unname(sifters[i]) | |
| abstracts_by_same_affiliation <- which(sifter_affiliation == unname(abstracts[abstracts_available])) | |
| if (length(abstracts_by_same_affiliation) > 0) { | |
| abstracts_available <- abstracts_available[-abstracts_by_same_affiliation] | |
| } | |
| if (length(abstracts_available) == 0) next | |
| # 2. Select from pool randomly and assign to sifter | |
| abstract_selected <- .resample(abstracts_available, 1) | |
| sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected) | |
| # 3. Increment count for sampled abstract | |
| assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1 | |
| if (all(assignment_counts == assignment_cap)) break | |
| } | |
| # Reorder so sifter with fewest assignments gets next assignment | |
| sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))] | |
| iter <- iter + 1 | |
| if (all(assignment_counts == assignment_cap)) break | |
| if (iter == max_iterations) { | |
| cat("max_iterations reached") | |
| break | |
| } | |
| } | |
| sifter_assignments <- lapply(sifter_assignments, sort) | |
| sifter_assignments[order(names(sifter_assignments))] | |
| } | |
| # Prepare demo sifter and abstract sets | |
| set.seed(1) | |
| n_sifters <- 3 | |
| sifter_set <- setNames(charlatan::ch_company(n_sifters), charlatan::ch_name(n_sifters)) | |
| sifter_names <- names(sifter_set) | |
| sifter_companies <- unname(sifter_set) | |
| abstract_name_pool <- c(sifter_names, charlatan::ch_name(10)) | |
| abstract_company_pool <- c(rep(sifter_companies, 2), charlatan::ch_company(7)) | |
| abstract_set <- setNames(abstract_company_pool, abstract_name_pool) | |
| abstract_set <- sample(abstract_set, length(abstract_set)) | |
| .assign_abstracts( | |
| abstracts = abstract_set, | |
| sifters = sifter_set, | |
| assignment_cap = 2, | |
| sifter_caps = c("Channing Glover" = 3) | |
| ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| .resample <- function(x, ...) x[sample.int(length(x), ...)] # see ?sample | |
| .assign_sifters <- function( | |
| n_abstracts, | |
| sifters, | |
| sifter_caps, | |
| n_assignments | |
| ) { | |
| seq_abstracts <- seq_len(n_abstracts) | |
| sifter_assignments <- setNames(vector("list", length(sifters)), sifters) | |
| assignment_counts <- rep(0, n_abstracts) | |
| repeat_iter <- 0 | |
| repeat { | |
| for (i in names(sifter_assignments)) { | |
| # 1. Find the pool of abstracts available to this sifter (if any) | |
| # a. Check if sifter cap has been met | |
| # b. Add abstracts to pool if they have <n assignments | |
| # c. Remove abstracts that are already assigned to this sifter | |
| sifter_cap <- sifter_caps[[i]] | |
| sifter_assignment_count <- length(sifter_assignments[[i]]) | |
| if (!is.null(sifter_cap) && sifter_assignment_count == sifter_cap) next | |
| abstracts_under_min_count <- which(assignment_counts < n_assignments) | |
| if (length(abstracts_under_min_count) == 0) next | |
| already_assigned_to_sifter <- sifter_assignments[[i]] | |
| abstracts_available <- abstracts_under_min_count[!abstracts_under_min_count %in% already_assigned_to_sifter] | |
| if (length(abstracts_available) == 0) next | |
| # 2. Select from pool randomly and assign to sifter | |
| abstract_selected <- .resample(abstracts_available, 1) | |
| sifter_assignments[[i]] <- c(sifter_assignments[[i]], abstract_selected) | |
| # 3. Increment count for sampled abstract | |
| assignment_counts[abstract_selected] <- assignment_counts[abstract_selected] + 1 | |
| if (all(assignment_counts == n_assignments)) break | |
| } | |
| # Reorder so sifter with fewest assignments gets next assignment | |
| sifter_assignments <- sifter_assignments[order(lengths(sifter_assignments))] | |
| repeat_iter <- repeat_iter + 1 | |
| if (all(assignment_counts == n_assignments)) break | |
| } | |
| sifter_assignments <- lapply(sifter_assignments, sort) | |
| sifter_assignments[order(names(sifter_assignments))] | |
| } | |
| reps <- 3 | |
| x <- .assign_sifters( | |
| n_abstracts = 223, | |
| sifters = LETTERS[1:5], | |
| sifter_caps = list(A = 10, B = 20), | |
| reps | |
| ) | |
| lengths(x) | |
| # A B C D E | |
| # 10 20 139 139 138 | |
| unique(table(unlist(x))) == reps | |
| # [1] TRUE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment