Last active
December 16, 2022 05:04
-
-
Save jeffeaton/099af98b46210b2b59b1a51942407ce2 to your computer and use it in GitHub Desktop.
Function to update ANC data in EPP XML file within Spectrum PJNZ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(xml2) | |
library(epp) | |
eppd_overwrite_matrix <- function (xm, mat) { | |
if (!xml_attr(xm, "class") %in% c("[D", "[I")) | |
stop("Tried to invoke .parse_matrix() on array node not of class '[D' or '[I'.") | |
m_rows <- as.integer(xml_attr(xm, "length")) | |
stopifnot(nrow(mat) == m_rows) | |
rows <- xml_children(xm) | |
idx <- as.integer(xml_attr(rows, "index")) + 1L | |
if ( !all(idx == seq_len(m_rows)) ) { | |
stop("Missing row indices. Function needs development to handle this case.") | |
} | |
rows <- xml_find_first(rows, "array") | |
for(i in idx) { | |
eppd_overwrite_array(rows[[i]], mat[i, ]) | |
} | |
invisible() | |
} | |
eppd_overwrite_array <- function(x, arr_new) { | |
a_length <- as.integer(xml_attr(x, "length")) | |
a_mode <- switch(xml_attr(x, "class"), | |
int = "integer", | |
double = "numeric", | |
boolean = "logical", | |
java.lang.String = "character") | |
arr <- vector(a_mode, a_length) | |
elem <- xml_children(x) | |
idx <- as.integer(xml_attr(elem, "index")) + 1L | |
## `idx` contains indices of non-zero values in the existing array | |
## If there are any 0 values that are changed to non-zero values | |
## in the new array, a new XML node needs to be added to record | |
## the value | |
if ( length(arr_new[-idx]) > 0 && | |
any(arr_new[-idx] != 0.0) ) { | |
new_idx <- seq_along(arr_new)[-idx] | |
new_idx <- new_idx[arr_new[new_idx] != 0.0] | |
for(ii in new_idx) { | |
## Copy first node -> insert in new location | |
xml_add_child(x, elem[[1]], .where = new_idx-1, .copy = TRUE) | |
new_node <- xml_children(x)[[new_idx]] | |
xml_attr(new_node, "index") <- new_idx-1 # 0-based index in XML | |
} | |
xml_attr(x, "length") <- as.character(xml_length(x)) | |
idx <- sort(c(idx, new_idx)) | |
} | |
## Write new values into XML array | |
elem <- xml_children(x) | |
xml_text(elem) <- as.character(arr_new)[idx] | |
invisible() | |
} | |
#' Update ANC testing data in EPP XML | |
#' | |
#' @param pjnz path to base PJNZ file | |
#' @param pjnz_new path to new PJNZ file | |
#' @param eppd_new new EPP data set; constructed by `epp::read_epp_data()` | |
#' | |
#' @details | |
#' | |
#' @examples | |
#' | |
#' ## Step 1: Read base EPP data | |
#' pjnz <- "~/Downloads/mwi-xml-test.pjnz" | |
#' eppd <- read_epp_data(pjnz) | |
#' | |
#' ## Step 2: Update the EPP ANC data | |
#' ## * Do not change array structure | |
#' | |
#' eppd_new <- eppd | |
#' | |
#' eppd_new[["Southern Region"]]$ancrtsite.prev <- 1.1 * eppd[["Southern Region"]]$ancrtsite.prev | |
#' eppd_new[["Southern Region"]]$ancrtsite.n <- floor(0.85 * eppd[["Southern Region"]]$ancrtsite.n) | |
#' | |
#' eppd_new[["Southern Region"]]$ancrtcens$prev <- 1.1 * eppd[["Southern Region"]]$ancrtcens$prev | |
#' eppd_new[["Southern Region"]]$ancrtcens$n <- floor(0.85 * eppd[["Southern Region"]]$ancrtcens$n) | |
#' | |
#' pjnz_new <- "~/Downloads/mwi-xml-test-copy.pjnz" | |
#' | |
#' ## Step 3: Write a new PJNZ file with updated ANC data | |
#' ## * Requires the base file | |
#' | |
#' epp_xml_update_anc("~/Downloads/mwi-xml-test.pjnz", | |
#' "~/Downloads/mwi-xml_updated-anc.pjnz", | |
#' eppd_new) | |
#' | |
epp_xml_update_anc <- function(pjnz, pjnz_new, eppd_new) { | |
pjnz <- normalizePath(pjnz, mustWork = TRUE) | |
pjnz_new <- normalizePath(pjnz_new, mustWork = FALSE) | |
if (pjnz == pjnz_new) { | |
stop("pjnz is same as pjnz_new. Specify new file name for edited PJNZ file.") | |
} | |
xmlfile <- grep(".xml", unzip(pjnz, list = TRUE)$Name, value = TRUE) | |
if (!length(xmlfile)) { | |
warning(paste0("No EPP .xml file found for ", basename(pjnz))) | |
return(NULL) | |
} | |
con <- unz(pjnz, xmlfile) | |
epp.xml <- read_xml(con) | |
r <- xml_children(xml_child(epp.xml)) | |
names(r) <- xml_attr(r, "property") | |
obj <- xml_find_all(r, ".//object") | |
projsets <- obj[which(xml_attr(obj, "class") == "epp2011.core.sets.ProjectionSet")] | |
for(eppSet in projsets){ | |
projset_id <- as.integer(gsub("[^0-9]", "", xml_attr(eppSet, "id"))) | |
eppSet <- xml_children(eppSet) | |
names(eppSet) <- xml_attr(eppSet, "property") | |
eppName <- xml_text(eppSet[["name"]]) | |
stopifnot(eppName %in% names(eppd_new)) | |
## ANC data ## | |
if(exists("siteNames", eppSet)) { | |
siteNames <- epp:::.parse_array(xml_find_first(eppSet[["siteNames"]], "array")) | |
nsites <- length(siteNames) | |
## ANC site used | |
anc.used <- epp:::.parse_array(xml_find_first(eppSet[["siteSelected"]], "array")) | |
## ANC prevalence | |
anc.prev_new <- eppd_new[[eppName]]$anc.prev | |
anc.prev_new <- 100 * anc.prev_new | |
anc.prev_new[is.na(anc.prev_new)] <- -1 | |
stopifnot(rownames(anc.prev_new) == siteNames) | |
eppd_overwrite_matrix(xml_find_first(eppSet[["survData"]], "array"), anc.prev_new) | |
## ANC sample sizes | |
anc.n_new <- eppd_new[[eppName]]$anc.n | |
anc.n_new[is.na(anc.n_new)] <- -1 | |
stopifnot(rownames(anc.n_new) == siteNames) | |
eppd_overwrite_matrix(xml_find_first(eppSet[["survSampleSizes"]], "array"), anc.n_new) | |
## ANC-RT site level | |
if(length(eppSet[["dataInputMode"]]) && | |
length(xml_find_first(eppSet[["dataInputMode"]], ".//string"))) | |
input_mode <- xml_text(xml_find_first(eppSet[["dataInputMode"]], ".//string")) | |
if(length(eppSet[["PMTCTData"]]) && input_mode == "ANC") { | |
ancrtsite.prev_new <- eppd_new[[eppName]]$ancrtsite.prev | |
ancrtsite.prev_new <- 100 * ancrtsite.prev_new | |
ancrtsite.prev_new[is.na(ancrtsite.prev_new)] <- -1 | |
stopifnot(rownames(ancrtsite.prev_new) == siteNames) | |
eppd_overwrite_matrix(xml_find_first(eppSet[["PMTCTData"]], "array"), ancrtsite.prev_new) | |
ancrtsite.n_new <- eppd_new[[eppName]]$ancrtsite.n | |
ancrtsite.n_new[is.na(ancrtsite.n_new)] <- -1 | |
stopifnot(rownames(ancrtsite.n_new) == siteNames) | |
eppd_overwrite_matrix(xml_find_first(eppSet[["PMTCTSiteSampleSizes"]], "array"), ancrtsite.n_new) | |
} | |
} | |
## ANC-RT census level | |
if (length(eppSet[["censusPMTCTSurvData"]]) && input_mode == "ANC") { | |
## ANC-RT census prevalence | |
ancrtcens_prev_node <- xml_find_first(eppSet[["censusPMTCTSurvData"]], "array") | |
ancrtcens_len <- as.integer(xml_attr(ancrtcens_prev_node, "length")) | |
ancrtcens.prev_new <- rep(-1, ancrtcens_len) | |
idx_obs <- eppd_new[[eppName]]$ancrtcens$year - 1985 + 1 | |
ancrtcens.prev_new[idx_obs] <- 100 * eppd_new[[eppName]]$ancrtcens$prev | |
eppd_overwrite_array(ancrtcens_prev_node, ancrtcens.prev_new) | |
## ANC-RT census sample size | |
ancrtcens_n_node <- xml_find_first(eppSet[["censusPMTCTSampleSizes"]], "array") | |
ancrtcens_len <- as.integer(xml_attr(ancrtcens_n_node, "length")) | |
ancrtcens.n_new <- rep(-1, ancrtcens_len) | |
idx_obs <- eppd_new[[eppName]]$ancrtcens$year - 1985 + 1 | |
ancrtcens.n_new[idx_obs] <- eppd_new[[eppName]]$ancrtcens$n | |
eppd_overwrite_array(ancrtcens_n_node, ancrtcens.n_new) | |
} | |
} | |
## Save new PJNZ file and copy in new EPP XML | |
file.copy(pjnz, pjnz_new) | |
wd <- getwd() | |
on.exit(setwd(wd)) | |
tmpd <- tempfile() | |
dir.create(tmpd) | |
setwd(tmpd) | |
if (!dir.exists(dirname(xmlfile))) { | |
dir.create(dirname(xmlfile), recursive = TRUE) | |
} | |
write_xml(epp.xml, xmlfile) | |
on.exit(file.remove(xmlfile)) | |
utils::zip(pjnz_new, xmlfile) | |
pjnz_new | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment