t-student · February 5, 2018 22:27
diff --git a/demographics.R b/demographics.R
 # See majutils
 mean_sd <- function (x, dp = 2) 
 {
    my.stat <- paste0(round(mean(x, na.rm = T), dp), " (", round(sd(x, 
        na.rm = T), dp), ")")
    return(my.stat)
 }

 prop <- function (x, level, dp = 1, percent = T) 
 {
    x2 <- as.character(x)
    if (is.na(level)) {
        myfreq <- length(x2[is.na(x2)])
    }
    else {
        lvl2 <- as.character(level)
        myfreq <- length(x2[x2 == lvl2])
    }
    myprop <- myfreq/len(x2)
    if (percent) {
        myprop <- round(myprop * 100, dp)
    }
    else {
        myprop <- round(myprop, dp)
    }
    myprop
 }

 freq_prop <- function (x, level, dp = 1, percent = T) 
 {
    myprop <- prop(x, level, dp, percent)
    x2 <- as.character(x)
    if (is.na(level)) {
        myfreq <- length(x2[is.na(x2)])
    }
    else {
        lvl2 <- as.character(level)
        myfreq <- length(x2[x2 == lvl2])
    }
    my.stat <- paste0(myfreq, " (", myprop, ")")
    my.stat
 }


 head(df.tmp)
 # A tibble: 6 x 45
  patie~ pract~ activ~ recei~ pract~ reg.c~ reg.pr~ reg.p~ reg.p~ reg.~ reg.~ pat.~ sex   pati~ pat.~ first.enc~ last.enco~
   <dbl>  <dbl>  <dbl> <chr>  <chr>   <dbl>   <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <fct> <chr> <dbl> <date>     <date>    
 1  21342   3.00   1.00 Y      Medis~   1.00    1.00   1.00   1.00  1.00  1.00  1.00 Fema~ A      66.0 2004-03-30 2017-08-28
 2  19273   3.00   1.00 Y      Medis~   1.00    1.00   1.00   1.00  1.00  1.00  1.00 Fema~ A      79.0 2004-08-02 2017-10-23
 3  19273   3.00   1.00 Y      Medis~   1.00    1.00   1.00   1.00  1.00  1.00  1.00 Fema~ A      79.0 2004-08-02 2017-10-23

 # 
 tbl1 <- df.tmp %>%
    dplyr::filter(practiceid %in% 1:10) %>%
    dplyr::group_by(pracid) %>%
    dplyr::summarise(n_idx00                  = n(),
                     age.mean_idx01           = mean_sd(pat.age, dp = 1),   ########### >>> 1. The _idx0i controls final ordering
                     
                     n.pct.yint_idx02          = freq_prop(received.intervention, "Y"),
                     n.pct.nint_idx03          = freq_prop(received.intervention, "N"),
                     
                     n.pct.sex.male_idx04      = freq_prop(sex, "Male"),
                     n.pct.sex.fem_idx05       = freq_prop(sex, "Female"),
                     n.pct.sex.int_idx06       = freq_prop(sex, "Intersex or indeterminate"),
                     n.pct.sex.nil_idx07       = freq_prop(sex, "Not stated/inadequately described"),
                     
                     n.pct.stat.inact_idx08    = freq_prop(patient.status, "I"),
                     n.pct.stat.act_idx09      = freq_prop(patient.status, "A"),
                     n.pct.stat.dead_idx10     = freq_prop(patient.status, "D"),
                     n.pct.stat.oth_idx11      = freq_prop(patient.status, "O"),
                     
                     n.pct.smk.non_idx12       = freq_prop(smoking.status, "Non smoker"),
                     n.pct.smk.y_idx13         = freq_prop(smoking.status, "Smoker"),
                     n.pct.smk.ex_idx14        = freq_prop(smoking.status, "Ex smoker"),
                     n.pct.smk.notrec_idx15    = freq_prop(smoking.status," Not recorded (not known)"),
                     n.pct.smk.na_idx16        = freq_prop(smoking.status, NA),
                     
                     n.pct.seif1_idx17         = freq_prop(patient.seifa.quintile, "1"),
                     n.pct.seif2_idx18         = freq_prop(patient.seifa.quintile, "2"),
                     n.pct.seif3_idx19         = freq_prop(patient.seifa.quintile, "3"),
                     n.pct.seif4_idx20         = freq_prop(patient.seifa.quintile, "4"),
                     n.pct.seif5_idx21         = freq_prop(patient.seifa.quintile, "5"),
                     n.pct.seifNA_idx22        = freq_prop(patient.seifa.quintile, NA),
                     
                     n.pct.cvd0_idx22          = freq_prop(current.cvd, "0"),
                     n.pct.cvd1_idx23          = freq_prop(current.cvd, "1"),
                     n.pct.cvdNA_idx24         = freq_prop(current.cvd, NA)
                     
    )  %>%
    sapply(., as.character) %>%   ########### >>> 2. Converts everything to character class
    as_data_frame(.) %>%
    tidyr::gather("var", "val", -pracid) %>%
    # dplyr::mutate(stagegrp = paste(grp, period.num, sep = ".")) %>%   ########### >>> 3. Only necessary for grouping on multi vars
    # dplyr::select(-period.num, -grp) %>%
    tidyr::spread(pracid, val)  %>%
    tidyr::separate(var, c("var", "idx"), sep = "_") %>%
    dplyr::arrange(idx) %>%
    dplyr::select(-idx) 
    
    
    > tbl1
 # A tibble: 26 x 7
   var              PracID03    PracID04   PracID05   PracID07   PracID09    PracID10   
   <chr>            <chr>       <chr>      <chr>      <chr>      <chr>       <chr>      
 1 n                452         763        764        516        983         1750       
 2 age.mean         68.8 (11.4) 67.3 (8.1) 70.1 (7.4) 65.5 (6.8) 54.3 (15.4) 67.3 (12.9)
 3 n.pct.yint       452 (100)   763 (100)  764 (100)  516 (100)  983 (100)   1750 (100) 
 4 n.pct.nint       0 (0)       0 (0)      0 (0)      0 (0)      0 (0)       0 (0)      
 5 n.pct.sex.male   218 (48.2)  579 (75.9) 522 (68.3) 293 (56.8) 390 (39.7)  951 (54.3) 
 6 n.pct.sex.fem    234 (51.8)  184 (24.1) 242 (31.7) 223 (43.2) 593 (60.3)  799 (45.7) 
 7 n.pct.sex.int    0 (0)       0 (0)      0 (0)      0 (0)      0 (0)       0 (0)      
 8 n.pct.sex.nil    0 (0)       0 (0)      0 (0)      0 (0)      0 (0)       0 (0)      
 9 n.pct.stat.inact 0 (0)       0 (0)      0 (0)      0 (0)      0 (0)       0 (0)      
 10 n.pct.stat.act   452 (100)   763 (100)  764 (100)  516 (100)  983 (100)   1750 (100) 
    
    
 tbl1 <- tbl1 %>% xtable::xtable(.)
    
 print(tbl1, only.contents=TRUE, include.rownames=F,
  include.colnames=F, floating=F,
  hline.after=NULL, sanitize.text.function=identity)
	# See majutils
	mean_sd <- function (x, dp = 2)
	{
	my.stat <- paste0(round(mean(x, na.rm = T), dp), " (", round(sd(x,
	na.rm = T), dp), ")")
	return(my.stat)
	}

	prop <- function (x, level, dp = 1, percent = T)
	{
	x2 <- as.character(x)
	if (is.na(level)) {
	myfreq <- length(x2[is.na(x2)])
	}
	else {
	lvl2 <- as.character(level)
	myfreq <- length(x2[x2 == lvl2])
	}
	myprop <- myfreq/len(x2)
	if (percent) {
	myprop <- round(myprop * 100, dp)
	}
	else {
	myprop <- round(myprop, dp)
	}
	myprop
	}

	freq_prop <- function (x, level, dp = 1, percent = T)
	{
	myprop <- prop(x, level, dp, percent)
	x2 <- as.character(x)
	if (is.na(level)) {
	myfreq <- length(x2[is.na(x2)])
	}
	else {
	lvl2 <- as.character(level)
	myfreq <- length(x2[x2 == lvl2])
	}
	my.stat <- paste0(myfreq, " (", myprop, ")")
	my.stat
	}


	head(df.tmp)
	# A tibble: 6 x 45
	patie~ pract~ activ~ recei~ pract~ reg.c~ reg.pr~ reg.p~ reg.p~ reg.~ reg.~ pat.~ sex pati~ pat.~ first.enc~ last.enco~
	<dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <dbl> <date> <date>
	1 21342 3.00 1.00 Y Medis~ 1.00 1.00 1.00 1.00 1.00 1.00 1.00 Fema~ A 66.0 2004-03-30 2017-08-28
	2 19273 3.00 1.00 Y Medis~ 1.00 1.00 1.00 1.00 1.00 1.00 1.00 Fema~ A 79.0 2004-08-02 2017-10-23
	3 19273 3.00 1.00 Y Medis~ 1.00 1.00 1.00 1.00 1.00 1.00 1.00 Fema~ A 79.0 2004-08-02 2017-10-23

	#
	tbl1 <- df.tmp %>%
	dplyr::filter(practiceid %in% 1:10) %>%
	dplyr::group_by(pracid) %>%
	dplyr::summarise(n_idx00 = n(),
	age.mean_idx01 = mean_sd(pat.age, dp = 1), ########### >>> 1. The _idx0i controls final ordering

	n.pct.yint_idx02 = freq_prop(received.intervention, "Y"),
	n.pct.nint_idx03 = freq_prop(received.intervention, "N"),

	n.pct.sex.male_idx04 = freq_prop(sex, "Male"),
	n.pct.sex.fem_idx05 = freq_prop(sex, "Female"),
	n.pct.sex.int_idx06 = freq_prop(sex, "Intersex or indeterminate"),
	n.pct.sex.nil_idx07 = freq_prop(sex, "Not stated/inadequately described"),

	n.pct.stat.inact_idx08 = freq_prop(patient.status, "I"),
	n.pct.stat.act_idx09 = freq_prop(patient.status, "A"),
	n.pct.stat.dead_idx10 = freq_prop(patient.status, "D"),
	n.pct.stat.oth_idx11 = freq_prop(patient.status, "O"),

	n.pct.smk.non_idx12 = freq_prop(smoking.status, "Non smoker"),
	n.pct.smk.y_idx13 = freq_prop(smoking.status, "Smoker"),
	n.pct.smk.ex_idx14 = freq_prop(smoking.status, "Ex smoker"),
	n.pct.smk.notrec_idx15 = freq_prop(smoking.status," Not recorded (not known)"),
	n.pct.smk.na_idx16 = freq_prop(smoking.status, NA),

	n.pct.seif1_idx17 = freq_prop(patient.seifa.quintile, "1"),
	n.pct.seif2_idx18 = freq_prop(patient.seifa.quintile, "2"),
	n.pct.seif3_idx19 = freq_prop(patient.seifa.quintile, "3"),
	n.pct.seif4_idx20 = freq_prop(patient.seifa.quintile, "4"),
	n.pct.seif5_idx21 = freq_prop(patient.seifa.quintile, "5"),
	n.pct.seifNA_idx22 = freq_prop(patient.seifa.quintile, NA),

	n.pct.cvd0_idx22 = freq_prop(current.cvd, "0"),
	n.pct.cvd1_idx23 = freq_prop(current.cvd, "1"),
	n.pct.cvdNA_idx24 = freq_prop(current.cvd, NA)

	) %>%
	sapply(., as.character) %>% ########### >>> 2. Converts everything to character class
	as_data_frame(.) %>%
	tidyr::gather("var", "val", -pracid) %>%
	# dplyr::mutate(stagegrp = paste(grp, period.num, sep = ".")) %>% ########### >>> 3. Only necessary for grouping on multi vars
	# dplyr::select(-period.num, -grp) %>%
	tidyr::spread(pracid, val) %>%
	tidyr::separate(var, c("var", "idx"), sep = "_") %>%
	dplyr::arrange(idx) %>%
	dplyr::select(-idx)


	> tbl1
	# A tibble: 26 x 7
	var PracID03 PracID04 PracID05 PracID07 PracID09 PracID10
	<chr> <chr> <chr> <chr> <chr> <chr> <chr>
	1 n 452 763 764 516 983 1750
	2 age.mean 68.8 (11.4) 67.3 (8.1) 70.1 (7.4) 65.5 (6.8) 54.3 (15.4) 67.3 (12.9)
	3 n.pct.yint 452 (100) 763 (100) 764 (100) 516 (100) 983 (100) 1750 (100)
	4 n.pct.nint 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
	5 n.pct.sex.male 218 (48.2) 579 (75.9) 522 (68.3) 293 (56.8) 390 (39.7) 951 (54.3)
	6 n.pct.sex.fem 234 (51.8) 184 (24.1) 242 (31.7) 223 (43.2) 593 (60.3) 799 (45.7)
	7 n.pct.sex.int 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
	8 n.pct.sex.nil 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
	9 n.pct.stat.inact 0 (0) 0 (0) 0 (0) 0 (0) 0 (0) 0 (0)
	10 n.pct.stat.act 452 (100) 763 (100) 764 (100) 516 (100) 983 (100) 1750 (100)


	tbl1 <- tbl1 %>% xtable::xtable(.)

	print(tbl1, only.contents=TRUE, include.rownames=F,
	include.colnames=F, floating=F,
	hline.after=NULL, sanitize.text.function=identity)