Skip to content

Instantly share code, notes, and snippets.

@jeffeaton
Last active December 16, 2022 05:04
Show Gist options
  • Save jeffeaton/099af98b46210b2b59b1a51942407ce2 to your computer and use it in GitHub Desktop.
Save jeffeaton/099af98b46210b2b59b1a51942407ce2 to your computer and use it in GitHub Desktop.
Function to update ANC data in EPP XML file within Spectrum PJNZ
library(xml2)
library(epp)
eppd_overwrite_matrix <- function (xm, mat) {
if (!xml_attr(xm, "class") %in% c("[D", "[I"))
stop("Tried to invoke .parse_matrix() on array node not of class '[D' or '[I'.")
m_rows <- as.integer(xml_attr(xm, "length"))
stopifnot(nrow(mat) == m_rows)
rows <- xml_children(xm)
idx <- as.integer(xml_attr(rows, "index")) + 1L
if ( !all(idx == seq_len(m_rows)) ) {
stop("Missing row indices. Function needs development to handle this case.")
}
rows <- xml_find_first(rows, "array")
for(i in idx) {
eppd_overwrite_array(rows[[i]], mat[i, ])
}
invisible()
}
eppd_overwrite_array <- function(x, arr_new) {
a_length <- as.integer(xml_attr(x, "length"))
a_mode <- switch(xml_attr(x, "class"),
int = "integer",
double = "numeric",
boolean = "logical",
java.lang.String = "character")
arr <- vector(a_mode, a_length)
elem <- xml_children(x)
idx <- as.integer(xml_attr(elem, "index")) + 1L
## `idx` contains indices of non-zero values in the existing array
## If there are any 0 values that are changed to non-zero values
## in the new array, a new XML node needs to be added to record
## the value
if ( length(arr_new[-idx]) > 0 &&
any(arr_new[-idx] != 0.0) ) {
new_idx <- seq_along(arr_new)[-idx]
new_idx <- new_idx[arr_new[new_idx] != 0.0]
for(ii in new_idx) {
## Copy first node -> insert in new location
xml_add_child(x, elem[[1]], .where = new_idx-1, .copy = TRUE)
new_node <- xml_children(x)[[new_idx]]
xml_attr(new_node, "index") <- new_idx-1 # 0-based index in XML
}
xml_attr(x, "length") <- as.character(xml_length(x))
idx <- sort(c(idx, new_idx))
}
## Write new values into XML array
elem <- xml_children(x)
xml_text(elem) <- as.character(arr_new)[idx]
invisible()
}
#' Update ANC testing data in EPP XML
#'
#' @param pjnz path to base PJNZ file
#' @param pjnz_new path to new PJNZ file
#' @param eppd_new new EPP data set; constructed by `epp::read_epp_data()`
#'
#' @details
#'
#' @examples
#'
#' ## Step 1: Read base EPP data
#' pjnz <- "~/Downloads/mwi-xml-test.pjnz"
#' eppd <- read_epp_data(pjnz)
#'
#' ## Step 2: Update the EPP ANC data
#' ## * Do not change array structure
#'
#' eppd_new <- eppd
#'
#' eppd_new[["Southern Region"]]$ancrtsite.prev <- 1.1 * eppd[["Southern Region"]]$ancrtsite.prev
#' eppd_new[["Southern Region"]]$ancrtsite.n <- floor(0.85 * eppd[["Southern Region"]]$ancrtsite.n)
#'
#' eppd_new[["Southern Region"]]$ancrtcens$prev <- 1.1 * eppd[["Southern Region"]]$ancrtcens$prev
#' eppd_new[["Southern Region"]]$ancrtcens$n <- floor(0.85 * eppd[["Southern Region"]]$ancrtcens$n)
#'
#' pjnz_new <- "~/Downloads/mwi-xml-test-copy.pjnz"
#'
#' ## Step 3: Write a new PJNZ file with updated ANC data
#' ## * Requires the base file
#'
#' epp_xml_update_anc("~/Downloads/mwi-xml-test.pjnz",
#' "~/Downloads/mwi-xml_updated-anc.pjnz",
#' eppd_new)
#'
epp_xml_update_anc <- function(pjnz, pjnz_new, eppd_new) {
pjnz <- normalizePath(pjnz, mustWork = TRUE)
pjnz_new <- normalizePath(pjnz_new, mustWork = FALSE)
if (pjnz == pjnz_new) {
stop("pjnz is same as pjnz_new. Specify new file name for edited PJNZ file.")
}
xmlfile <- grep(".xml", unzip(pjnz, list = TRUE)$Name, value = TRUE)
if (!length(xmlfile)) {
warning(paste0("No EPP .xml file found for ", basename(pjnz)))
return(NULL)
}
con <- unz(pjnz, xmlfile)
epp.xml <- read_xml(con)
r <- xml_children(xml_child(epp.xml))
names(r) <- xml_attr(r, "property")
obj <- xml_find_all(r, ".//object")
projsets <- obj[which(xml_attr(obj, "class") == "epp2011.core.sets.ProjectionSet")]
for(eppSet in projsets){
projset_id <- as.integer(gsub("[^0-9]", "", xml_attr(eppSet, "id")))
eppSet <- xml_children(eppSet)
names(eppSet) <- xml_attr(eppSet, "property")
eppName <- xml_text(eppSet[["name"]])
stopifnot(eppName %in% names(eppd_new))
## ANC data ##
if(exists("siteNames", eppSet)) {
siteNames <- epp:::.parse_array(xml_find_first(eppSet[["siteNames"]], "array"))
nsites <- length(siteNames)
## ANC site used
anc.used <- epp:::.parse_array(xml_find_first(eppSet[["siteSelected"]], "array"))
## ANC prevalence
anc.prev_new <- eppd_new[[eppName]]$anc.prev
anc.prev_new <- 100 * anc.prev_new
anc.prev_new[is.na(anc.prev_new)] <- -1
stopifnot(rownames(anc.prev_new) == siteNames)
eppd_overwrite_matrix(xml_find_first(eppSet[["survData"]], "array"), anc.prev_new)
## ANC sample sizes
anc.n_new <- eppd_new[[eppName]]$anc.n
anc.n_new[is.na(anc.n_new)] <- -1
stopifnot(rownames(anc.n_new) == siteNames)
eppd_overwrite_matrix(xml_find_first(eppSet[["survSampleSizes"]], "array"), anc.n_new)
## ANC-RT site level
if(length(eppSet[["dataInputMode"]]) &&
length(xml_find_first(eppSet[["dataInputMode"]], ".//string")))
input_mode <- xml_text(xml_find_first(eppSet[["dataInputMode"]], ".//string"))
if(length(eppSet[["PMTCTData"]]) && input_mode == "ANC") {
ancrtsite.prev_new <- eppd_new[[eppName]]$ancrtsite.prev
ancrtsite.prev_new <- 100 * ancrtsite.prev_new
ancrtsite.prev_new[is.na(ancrtsite.prev_new)] <- -1
stopifnot(rownames(ancrtsite.prev_new) == siteNames)
eppd_overwrite_matrix(xml_find_first(eppSet[["PMTCTData"]], "array"), ancrtsite.prev_new)
ancrtsite.n_new <- eppd_new[[eppName]]$ancrtsite.n
ancrtsite.n_new[is.na(ancrtsite.n_new)] <- -1
stopifnot(rownames(ancrtsite.n_new) == siteNames)
eppd_overwrite_matrix(xml_find_first(eppSet[["PMTCTSiteSampleSizes"]], "array"), ancrtsite.n_new)
}
}
## ANC-RT census level
if (length(eppSet[["censusPMTCTSurvData"]]) && input_mode == "ANC") {
## ANC-RT census prevalence
ancrtcens_prev_node <- xml_find_first(eppSet[["censusPMTCTSurvData"]], "array")
ancrtcens_len <- as.integer(xml_attr(ancrtcens_prev_node, "length"))
ancrtcens.prev_new <- rep(-1, ancrtcens_len)
idx_obs <- eppd_new[[eppName]]$ancrtcens$year - 1985 + 1
ancrtcens.prev_new[idx_obs] <- 100 * eppd_new[[eppName]]$ancrtcens$prev
eppd_overwrite_array(ancrtcens_prev_node, ancrtcens.prev_new)
## ANC-RT census sample size
ancrtcens_n_node <- xml_find_first(eppSet[["censusPMTCTSampleSizes"]], "array")
ancrtcens_len <- as.integer(xml_attr(ancrtcens_n_node, "length"))
ancrtcens.n_new <- rep(-1, ancrtcens_len)
idx_obs <- eppd_new[[eppName]]$ancrtcens$year - 1985 + 1
ancrtcens.n_new[idx_obs] <- eppd_new[[eppName]]$ancrtcens$n
eppd_overwrite_array(ancrtcens_n_node, ancrtcens.n_new)
}
}
## Save new PJNZ file and copy in new EPP XML
file.copy(pjnz, pjnz_new)
wd <- getwd()
on.exit(setwd(wd))
tmpd <- tempfile()
dir.create(tmpd)
setwd(tmpd)
if (!dir.exists(dirname(xmlfile))) {
dir.create(dirname(xmlfile), recursive = TRUE)
}
write_xml(epp.xml, xmlfile)
on.exit(file.remove(xmlfile))
utils::zip(pjnz_new, xmlfile)
pjnz_new
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment