Last active
February 5, 2018 22:27
-
-
Save t-student/d92be5d6fb8ecb91597e4ecf72f5e0f6 to your computer and use it in GitHub Desktop.
Demographics via dplyr - group_by
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# See majutils | |
mean_sd <- function (x, dp = 2) | |
{ | |
my.stat <- paste0(round(mean(x, na.rm = T), dp), " (", round(sd(x, | |
na.rm = T), dp), ")") | |
return(my.stat) | |
} | |
prop <- function (x, level, dp = 1, percent = T) | |
{ | |
x2 <- as.character(x) | |
if (is.na(level)) { | |
myfreq <- length(x2[is.na(x2)]) | |
} | |
else { | |
lvl2 <- as.character(level) | |
myfreq <- length(x2[x2 == lvl2]) | |
} | |
myprop <- myfreq/len(x2) | |
if (percent) { | |
myprop <- round(myprop * 100, dp) | |
} | |
else { | |
myprop <- round(myprop, dp) | |
} | |
myprop | |
} | |
freq_prop <- function (x, level, dp = 1, percent = T) | |
{ | |
myprop <- prop(x, level, dp, percent) | |
x2 <- as.character(x) | |
if (is.na(level)) { | |
myfreq <- length(x2[is.na(x2)]) | |
} | |
else { | |
lvl2 <- as.character(level) | |
myfreq <- length(x2[x2 == lvl2]) | |
} | |
my.stat <- paste0(myfreq, " (", myprop, ")") | |
my.stat | |
} | |
head(df.tmp) | |
# A tibble: 6 x 45 | |
patie~ pract~ activ~ recei~ pract~ reg.c~ reg.pr~ reg.p~ reg.p~ reg.~ reg.~ pat.~ sex pati~ pat.~ first.enc~ last.enco~ | |
<dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <dbl> <date> <date> | |
1 21342 3.00 1.00 Y Medis~ 1.00 1.00 1.00 1.00 1.00 1.00 1.00 Fema~ A 66.0 2004-03-30 2017-08-28 | |
2 19273 3.00 1.00 Y Medis~ 1.00 1.00 1.00 1.00 1.00 1.00 1.00 Fema~ A 79.0 2004-08-02 2017-10-23 | |
3 19273 3.00 1.00 Y Medis~ 1.00 1.00 1.00 1.00 1.00 1.00 1.00 Fema~ A 79.0 2004-08-02 2017-10-23 | |
# | |
tbl1 <- df.tmp %>% | |
dplyr::filter(practiceid %in% 1:10) %>% | |
dplyr::group_by(pracid) %>% | |
dplyr::summarise(n_idx00 = n(), | |
age.mean_idx01 = mean_sd(pat.age, dp = 1), ########### >>> 1. The _idx0i controls final ordering | |
n.pct.yint_idx02 = freq_prop(received.intervention, "Y"), | |
n.pct.nint_idx03 = freq_prop(received.intervention, "N"), | |
n.pct.sex.male_idx04 = freq_prop(sex, "Male"), | |
n.pct.sex.fem_idx05 = freq_prop(sex, "Female"), | |
n.pct.sex.int_idx06 = freq_prop(sex, "Intersex or indeterminate"), | |
n.pct.sex.nil_idx07 = freq_prop(sex, "Not stated/inadequately described"), | |
n.pct.stat.inact_idx08 = freq_prop(patient.status, "I"), | |
n.pct.stat.act_idx09 = freq_prop(patient.status, "A"), | |
n.pct.stat.dead_idx10 = freq_prop(patient.status, "D"), | |
n.pct.stat.oth_idx11 = freq_prop(patient.status, "O"), | |
n.pct.smk.non_idx12 = freq_prop(smoking.status, "Non smoker"), | |
n.pct.smk.y_idx13 = freq_prop(smoking.status, "Smoker"), | |
n.pct.smk.ex_idx14 = freq_prop(smoking.status, "Ex smoker"), | |
n.pct.smk.notrec_idx15 = freq_prop(smoking.status," Not recorded (not known)"), | |
n.pct.smk.na_idx16 = freq_prop(smoking.status, NA), | |
n.pct.seif1_idx17 = freq_prop(patient.seifa.quintile, "1"), | |
n.pct.seif2_idx18 = freq_prop(patient.seifa.quintile, "2"), | |
n.pct.seif3_idx19 = freq_prop(patient.seifa.quintile, "3"), | |
n.pct.seif4_idx20 = freq_prop(patient.seifa.quintile, "4"), | |
n.pct.seif5_idx21 = freq_prop(patient.seifa.quintile, "5"), | |
n.pct.seifNA_idx22 = freq_prop(patient.seifa.quintile, NA), | |
n.pct.cvd0_idx22 = freq_prop(current.cvd, "0"), | |
n.pct.cvd1_idx23 = freq_prop(current.cvd, "1"), | |
n.pct.cvdNA_idx24 = freq_prop(current.cvd, NA) | |
) %>% | |
sapply(., as.character) %>% ########### >>> 2. Converts everything to character class | |
as_data_frame(.) %>% | |
tidyr::gather("var", "val", -pracid) %>% | |
# dplyr::mutate(stagegrp = paste(grp, period.num, sep = ".")) %>% ########### >>> 3. Only necessary for grouping on multi vars | |
# dplyr::select(-period.num, -grp) %>% | |
tidyr::spread(pracid, val) %>% | |
tidyr::separate(var, c("var", "idx"), sep = "_") %>% | |
dplyr::arrange(idx) %>% | |
dplyr::select(-idx) | |
> tbl1 | |
# A tibble: 26 x 7 | |
var PracID03 PracID04 PracID05 PracID07 PracID09 PracID10 | |
<chr> <chr> <chr> <chr> <chr> <chr> <chr> | |
1 n 452 763 764 516 983 1750 | |
2 age.mean 68.8 (11.4) 67.3 (8.1) 70.1 (7.4) 65.5 (6.8) 54.3 (15.4) 67.3 (12.9) | |
3 n.pct.yint 452 (100) 763 (100) 764 (100) 516 (100) 983 (100) 1750 (100) | |
4 n.pct.nint 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) | |
5 n.pct.sex.male 218 (48.2) 579 (75.9) 522 (68.3) 293 (56.8) 390 (39.7) 951 (54.3) | |
6 n.pct.sex.fem 234 (51.8) 184 (24.1) 242 (31.7) 223 (43.2) 593 (60.3) 799 (45.7) | |
7 n.pct.sex.int 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) | |
8 n.pct.sex.nil 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) | |
9 n.pct.stat.inact 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) | |
10 n.pct.stat.act 452 (100) 763 (100) 764 (100) 516 (100) 983 (100) 1750 (100) | |
tbl1 <- tbl1 %>% xtable::xtable(.) | |
print(tbl1, only.contents=TRUE, include.rownames=F, | |
include.colnames=F, floating=F, | |
hline.after=NULL, sanitize.text.function=identity) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment