Last active
February 5, 2018 22:26
-
-
Save t-student/f19ea39d748041b7ed3ed7217816da84 to your computer and use it in GitHub Desktop.
tidyr spread when you want to spread multiple summary values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
> head(df.tmp) | |
# A tibble: 6 x 9 | |
patientid practiceid mmt.date mmt.val int pre preend post stage | |
<dbl> <dbl> <date> <dbl> <date> <date> <date> <date> <chr> | |
1 21342 3.00 2015-12-09 83.0 2017-03-07 2015-12-07 2016-12-07 2017-09-07 PRE | |
2 19273 3.00 2015-12-11 120 2017-03-07 2015-12-07 2016-12-07 2017-09-07 PRE | |
3 19273 3.00 2015-12-11 50.0 2017-03-07 2015-12-07 2016-12-07 2017-09-07 PRE | |
4 19273 3.00 2015-12-11 0 2017-03-07 2015-12-07 2016-12-07 2017-09-07 PRE | |
5 19273 3.00 2015-12-17 72.0 2017-03-07 2015-12-07 2016-12-07 2017-09-07 PRE | |
6 19273 3.00 2015-12-17 135 2017-03-07 2015-12-07 2016-12-07 2017-09-07 PRE | |
> str(df.tmp) | |
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 29984 obs. of 9 variables: | |
$ patientid : num 21342 19273 19273 19273 19273 ... | |
$ practiceid: num 3 3 3 3 3 3 3 3 3 3 ... | |
$ mmt.date : Date, format: "2015-12-09" "2015-12-11" "2015-12-11" ... | |
$ mmt.val : num 83 120 50 0 72 135 4.5 1.7 1.7 0.4 ... | |
$ int : Date, format: "2017-03-07" "2017-03-07" "2017-03-07" ... | |
$ pre : Date, format: "2015-12-07" "2015-12-07" "2015-12-07" ... | |
$ preend : Date, format: "2016-12-07" "2016-12-07" "2016-12-07" ... | |
$ post : Date, format: "2017-09-07" "2017-09-07" "2017-09-07" ... | |
$ stage : chr "PRE" "PRE" "PRE" "PRE" ... | |
# 1. Gives you pracid, stage, variable, value | |
# 2. Combines the stage and variable fields into a single field that we want to spread | |
# 3. Spread, pivot-like | |
df.tmp %>% | |
dplyr::group_by(practiceid, stage) %>% | |
dplyr::summarise(n.mmt = n(), | |
n.pat = len.unique(patientid)) %>% | |
tidyr::gather(variable, value, -(practiceid:stage)) %>% ############ <--------- 1. This bit is the trick | |
tidyr::unite(temp, stage, variable) %>% ############ <--------- 2. This bit is the trick | |
tidyr::spread(temp, value) %>% ############ <--------- 3. This bit is the trick | |
dplyr::select(practiceid, | |
PRE_n.pat, PRE_n.mmt, | |
INTV_n.pat, INTV_n.mmt, | |
POST_n.pat, POST_n.mmt ) %>% | |
sapply(., as.character) %>% | |
as_data_frame(.) | |
# A tibble: 31 x 7 | |
practiceid PRE_n.pat PRE_n.mmt INTV_n.pat INTV_n.mmt POST_n.pat POST_n.mmt | |
<chr> <chr> <chr> <chr> <chr> <chr> <chr> | |
1 3 11 345 6 67 4 16 | |
2 4 13 414 13 318 <NA> <NA> | |
3 5 16 577 14 151 <NA> <NA> | |
4 7 10 308 10 189 <NA> <NA> | |
5 9 13 333 17 366 16 195 | |
6 10 29 1367 22 285 <NA> <NA> | |
7 11 4 89 4 118 <NA> <NA> | |
8 12 4 147 4 96 <NA> <NA> | |
9 13 30 808 31 465 <NA> <NA> | |
10 14 39 1915 34 341 <NA> <NA> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment