Last active
September 27, 2021 14:37
-
-
Save alanocallaghan/ae83346bc66cd5d2cc8ccda1e9abfa07 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(utils) | |
for (pkg in rownames(installed.packages(priority="base"))) library(pkg, character.only=TRUE) | |
pkgs <- c( | |
"NHANES", | |
"RNHANES", | |
"ggplot2", | |
"jtools", | |
"interactions", | |
"patchwork", | |
"dplyr", | |
"tidyr" | |
) | |
for (pkg in pkgs) { | |
if (!pkg %in% rownames(installed.packages())) { | |
install.packages(pkg) | |
} | |
} | |
# proportions representing a simple random sample | |
prop <- as.numeric(table(NHANES::NHANES$Race1)/nrow(NHANES::NHANES)) | |
set.seed(1000) # reproducible | |
`%>%` <- magrittr::`%>%` | |
# take sample from NHANESraw that represents a simple random sample | |
dat <- NHANES::NHANESraw %>% | |
# add sample weights | |
dplyr::mutate(weight = dplyr::case_when(Race1 == "Black" ~ prop[1], | |
Race1 == "Hispanic" ~ prop[2], | |
Race1 == "Mexican" ~ prop[3], | |
Race1 == "White" ~ prop[4], | |
Race1 == "Other" ~ prop[5])) %>% | |
dplyr::group_by(Race1) %>% | |
dplyr::sample_n(10000 * weight) %>% # sample from each according to prop to obtain 10000 obvs in total | |
dplyr::rename(Sex = Gender) %>% | |
dplyr::select(-c(weight, | |
WTINT2YR, WTMEC2YR, | |
SDMVPSU, SDMVSTRA)) %>% # remove weighting columns | |
dplyr::select(-c(SurveyYr, HHIncomeMid, | |
Length, HeadCirc, | |
BMICatUnder20yrs, | |
BPSys1, BPSys2, | |
BPDia1, BPDia2, | |
BPSys3, BPDia3, | |
UrineVol2, | |
UrineFlow2, | |
PregnantNow)) %>% # remove variables which will not be used | |
dplyr::select(-c(Race3, | |
TVHrsDay, | |
CompHrsDay, | |
TVHrsDayChild, | |
CompHrsDayChild)) %>% # remove data which was only recorded for | |
# one out of two survey rounds | |
dplyr::ungroup(Race1) | |
# Add FEV1 variable | |
dat <- RNHANES::nhanes_load_data(c("SPX_F"), "2009-2010") %>% | |
dplyr::select(SEQN, SPXNFEV1) %>% | |
dplyr::bind_rows(RNHANES::nhanes_load_data(c("SPX_G"), "2011-2012") %>% | |
dplyr::select(SEQN, SPXNFEV1)) %>% | |
dplyr::filter(SEQN %in% dat$ID) %>% | |
dplyr::rename(FEV1 = SPXNFEV1) %>% | |
dplyr::right_join(dat, by = c("SEQN" = "ID")) %>% | |
dplyr::rename(ID = SEQN) | |
# Add LBXHGB variable (Blood hemoglobin, g/dL) | |
dat <- RNHANES::nhanes_load_data(c("CBC_F"), "2009-2010") %>% | |
dplyr::select(SEQN, LBXHGB) %>% | |
dplyr::bind_rows(RNHANES::nhanes_load_data(c("CBC_G"), "2011-2012") %>% | |
dplyr::select(SEQN, LBXHGB)) %>% | |
dplyr::filter(SEQN %in% dat$ID) %>% | |
dplyr::rename(Hemoglobin = LBXHGB) %>% | |
dplyr::right_join(dat, by = c("SEQN" = "ID")) %>% | |
dplyr::rename(ID = SEQN) | |
rm(prop) | |
for (pkg in pkgs) { | |
library(pkg, character.only = TRUE) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment