Skip to content

Instantly share code, notes, and snippets.

@jeffeaton
Created May 3, 2024 05:31
Show Gist options
  • Save jeffeaton/5f966a743e032d573842bbc75e108aa3 to your computer and use it in GitHub Desktop.
Save jeffeaton/5f966a743e032d573842bbc75e108aa3 to your computer and use it in GitHub Desktop.
PHIA survey analysis of CD4 <200 by care cascade stage
library(dplyr)
library(readr)
library(tidyr)
library(forcats)
library(haven)
library(survey)
phia_path <- "~/Data/household surveys/PHIA/datasets"
cmr2017phia <- rdhs::read_zipdata(file.path(phia_path, "CMR/datasets/302 CAMPHIA 2017-2018 Adult Biomarker Dataset (DTA).zip"))
civ2018phia <- rdhs::read_zipdata(file.path(phia_path, "CIV/datasets/302 CIPHIA 2017-2018 Adult Biomarker Dataset (DTA).zip"))
eth2018phia <- rdhs::read_zipdata(file.path(phia_path, "ETH/datasets/302 EPHIA 2017-2018 Adult Biomarker Dataset (DTA).zip"))
ken2018phia <- rdhs::read_zipdata(file.path(phia_path, "KEN/datasets/KENPHIA 2018 Household Interview and Biomarker Datasets v1.1 (DTA).zip"), "kenphia2018adultbio.dta")
lso2017phia <- rdhs::read_zipdata(file.path(phia_path, "LSO/datasets/302_LePHIA 2016-2017 Adult Biomarker Dataset (DTA).zip"))
mwi2016phia <- rdhs::read_zipdata(file.path(phia_path, "MWI/datasets/MPHIA 2015-2016 Household Interview and Biomarker Datasets v2.0 (DTA).zip"), "mphia2015adultbio.dta")
nam2017phia <- rdhs::read_zipdata(file.path(phia_path, "NAM/datasets/302_NAMPHIA 2017 Adult Biomarker Dataset (DTA).zip"))
rwa2019phia <- rdhs::read_zipdata(file.path(phia_path, "RWA/datasets/302 RPHIA 2018-2019 Adult Biomarker Dataset (DTA).zip"))
swz2017phia <- rdhs::read_zipdata(file.path(phia_path, "SWZ/datasets/SHIMS2 2016-2017 Household Interview and Biomarker Datasets v2.0 (DTA).zip"), "shims22016adultbio.dta")
tza2017phia <- rdhs::read_zipdata(file.path(phia_path, "TZA/datasets/THIS 2016-2017 Household Interview and Biomarker Datasets v2.0 (DTA).zip"), "this2016adultbio.dta")
uga2017phia <- rdhs::read_zipdata(file.path(phia_path, "UGA/datasets/302 UPHIA 2016-2017 Adult Biomarker Dataset (DTA).zip"))
zmb2016phia <- rdhs::read_zipdata(file.path(phia_path, "ZMB/datasets/ZAMPHIA 2016 Household Interview and Biomarker Datasets v2.0 (DTA).zip"), "zamphia2016adultbio.dta")
zwe2016phia <- rdhs::read_zipdata(file.path(phia_path, "ZWE/datasets/302_ZIMPHIA 2015-2016 Adult Biomarker Dataset (DTA).zip"))
nga2021phia <- haven::read_dta("~/Data/household surveys/Nigeria/NAIIS/public/datasets/NAIIS2018adultbio.dta")
#' ## PHIA2
mwi2020phia <- haven::read_dta(unz(file.path(phia_path, "MWI2020PHIA/datasets/MPHIA 2020-2021 Household Interview and Biomarker Data (DTA).zip"), "mphia2020adultbio.dta"))
lso2020phia <- haven::read_dta(unz(file.path(phia_path, "LSO2020PHIA/datasets/LePHIA 2020 Household (DTA).zip"), "lephia2020adultbio.dta"))
moz2021phia <- haven::read_dta(unz(file.path(phia_path, "MOZ2021PHIA/datasets/INSIDA 2021 Household Interview and Biomarker Datasets (DTA).zip"), "insida2021adultbio.dta"))
swz2021phia <- haven::read_dta(unz(file.path(phia_path, "SWZ2021PHIA/datasets/SHIMS3 2021 Household Interview and Biomarker Datasets (DTA).zip"), "shims32021adultbio.dta"))
zwe2020phia <- haven::read_dta(unz(file.path(phia_path, "ZWE2020PHIA/datasets/ZIMPHIA 2020 Household (DTA).zip"), "zimphia2020adultbio.dta"))
zmb2021phia <- haven::read_dta(unz(file.path(phia_path, "ZMB2021PHIA/ZAMPHIA 2021 PU Package.zip"), "zamphia2021adultbio.dta"))
bwa2021phia <- haven::read_dta("~/Data/household surveys/Botswana/BAIS V/datasets/baisv2021adultbio.dta.zip")
nga2021phia$centroidid <- sprintf("NG%04d%05d", nga2021phia$varstrat, nga2021phia$varunit)
datl <- grep("[a-z]{3}20[0-9]{2}phia$", ls(), value = TRUE) %>%
{setNames(lapply(., get), toupper(.))}
dat <- datl %>%
lapply(select, any_of(c("country", "householdid", "personid", "centroidid", "gender", "age",
"hivstatusfinal", "cd4count", "cd4cat",
"tri90aware", "tri90art", "tri90vls", "btwt0"))) %>%
lapply(haven::zap_labels) %>%
Map(mutate, ., survey_id = names(.)) %>%
bind_rows()
dat %>%
summarise(has_cd4 = any(!is.na(cd4count)), .by = c(country, survey_id)) %>%
print(n = Inf)
#' Subset to:
#' * Surveys that included CD4 testing
#' * HIV positive respondents
#' * btwt0 > 0
dat <- dat %>%
filter(
any(!is.na(cd4count)),
hivstatusfinal == 1,
!is.na(btwt0),
.by = survey_id
) %>%
mutate(
cascade = case_when(tri90aware == 2 ~ "1_unaware",
tri90aware == 1 & tri90art %in% 2:3 ~ "2_aware_untreated",
tri90art == 1 & tri90vls %in% 2:3 ~ "3_art_unsuppressed",
tri90vls == 1 ~ "4_vls"),
cd4_bel200 = as.integer(cd4count < 200)
)
des <- svydesign(~centroidid, strata = ~survey_id, nest = TRUE, data = dat, weights = ~btwt0)
cd4_bel200 <- svyby(~cd4_bel200, ~survey_id+cascade, des,
svyciprop, na.rm = TRUE, vartype = c("se", "ci")) %>%
rename_with(\(x) sub("^se\\..*", "se", (x)))
cascade_prop <- svyby(~cascade, ~survey_id, des,
svymean, na.rm = TRUE, vartype = c("se", "ci"))
cascade_prop <- cascade_prop %>%
rename_with(\(x) sub("^cascade", "mean.cascade", x)) %>%
pivot_longer(cols = -survey_id, names_sep = ".cascade", names_to = c("key", "cascade")) %>%
mutate(key = paste0("prop_", key)) %>%
pivot_wider(names_from = key, values_from = value)
cascade_counts <- dat %>%
filter(!is.na(cascade)) %>%
count(survey_id, cascade)
phia_info <- tribble(
~survey_id, ~iso3, ~country, ~survey_year_label, ~survey_year, ~phia_phase,
"CMR2017PHIA", "CMR", "Cameroon", "2017", 2017, "PHIA1",
"CIV2018PHIA", "CIV", "Cote d'Ivoire", "2017-2018", 2018, "PHIA1",
"SWZ2017PHIA", "SWZ", "Eswatini", "2016-2017", 2017, "PHIA1",
"ETH2018PHIA", "ETH", "Ethiopia", "2017-2018", 2018, "PHIA1",
"KEN2018PHIA", "KEN", "Kenya", "2018", 2018, "PHIA1",
"LSO2017PHIA", "LSO", "Lesotho", "2016-2017", 2017, "PHIA1",
"MWI2016PHIA", "MWI", "Malawi", "2015-2016", 2016, "PHIA1",
"NAM2017PHIA", "NAM", "Namibia", "2017", 2017, "PHIA1",
"RWA2019PHIA", "RWA", "Rwanda", "2018-2019", 2019, "PHIA1",
"TZA2017PHIA", "TZA", "Tanzania", "2016-2017", 2017, "PHIA1",
"UGA2017PHIA", "UGA", "Uganda", "2016-2017", 2017, "PHIA1",
"ZMB2016PHIA", "ZMB", "Zambia", "2016", 2016, "PHIA1",
"ZWE2016PHIA", "ZWE", "Zimbabwe", "2015-2016", 2016, "PHIA1",
"NGA2021PHIA", "NGA", "Nigeria", "2018", 2018, "PHIA1",
##
"LSO2020PHIA", "LSO", "Lesotho", "2020", 2020, "PHIA2",
"MOZ2021PHIA", "MOZ", "Mozambique", "2021", 2021, "PHIA2",
"MWI2020PHIA", "MWI", "Malawi", "2020", 2020, "PHIA2",
"SWZ2021PHIA", "SWZ", "Eswatini", "2021", 2021, "PHIA2",
"ZWE2020PHIA", "ZWE", "Zimbabwe", "2020", 2020, "PHIA2",
"ZMB2021PHIA", "ZMB", "Zambia", "2021", 2021, "PHIA2",
"BWA2021PHIA", "BWA", "Botswana", "2021", 2021, "PHIA2"
)
res <- phia_info %>%
right_join(cd4_bel200) %>%
left_join(cascade_counts) %>%
left_join(cascade_prop) %>%
arrange(cascade)
library(ggplot2)
library(scales)
res %>%
mutate(
n_eff = (cd4_bel200 * (1.0 - cd4_bel200)) / se^2,
) %>%
ggplot(aes(survey_year, cd4_bel200)) +
geom_point(aes(size = n_eff)) +
geom_smooth(method = "lm", show.legend = FALSE, aes(weight = n_eff)) +
facet_wrap(~cascade) +
scale_y_continuous(labels = label_percent()) +
labs(title = "Percentage with CD4 <200 by cascade stage; PHIA surveys")
res %>%
mutate(
n_eff = (cd4_bel200 * (1.0 - cd4_bel200)) / se^2,
) %>%
ggplot(aes(country, cd4_bel200, ymin = ci_l, ymax = ci_u, fill = phia_phase)) +
geom_col(position = position_dodge2(width = 1.0, padding = 0.0, preserve = "single")) +
geom_linerange(position = position_dodge2(width = 1.0, padding = 0.0, preserve = "single")) +
facet_wrap(~cascade) +
scale_y_continuous(labels = label_percent()) +
scale_fill_brewer(palette = "Set1") +
labs(title = "Percentage with CD4 <200 by cascade stage; PHIA surveys",
x = NULL) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
write_csv(res, "phia-prop-cd4-below200-by-stage.csv", na = "")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment