Last active
June 9, 2017 10:54
-
-
Save dantalus/3ffe96112c77be50c7d1c53f2c9b100a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| urlfile <-"https://raw.githubusercontent.com/dantalus/intro_workshop/master/plot.csv" | |
| plotDf <-read.csv(urlfile) | |
| # install.packages(c("ggthemes", "ggbeeswarm")) | |
| library(ggthemes) | |
| library(ggbeeswarm) | |
| library(ggplot2) | |
| library(dplyr) | |
| # WTF is a tibble? | |
| class(plotDf) | |
| as.tbl(plotDf) | |
| plotDf <- as.tbl(plotDf) | |
| class(plotDf) <- "data.frame" | |
| library(readr) | |
| plotDf <-read_csv(urlfile) | |
| plotDf <-as_tbl(read.csv(urlfile)) | |
| plotDf <-read.csv(urlfile) %>% as_tbl() | |
| # Rename variables | |
| # Base R | |
| plotDf$age <- plotDf$demo_age | |
| plotDf[, -5] | |
| plotDf[, !grepl("demo_age", names(plotDf))] | |
| names(plotDf)[5] <- "age" | |
| # dplyr | |
| plotDf <-read_csv(urlfile) | |
| rename(plotDf, age = demo_age) | |
| # Select rows #### | |
| # Base R | |
| plotDf[1:5, ] | |
| plotDf[plotDf$id > 199, ] | |
| plotDf[plotDf$id > 199 & plotDf$demo_gender == "Male", ] | |
| plotDf[grepl(" A", plotDf$arm), ] # character matching | |
| r <- plotDf$id > 199 | |
| plotDf[r, ] | |
| r <- plotDf$id %in% c(201, 202, 101) | |
| plotDf[r, ] | |
| subset(plotDf, id > 199) | |
| # dplyr | |
| filter(plotDf, id > 199 & demo_gender == "Male") | |
| filter(plotDf, id > 199 | demo_gender == "Male") | |
| filter(plotDf, (id > 199 & demo_gender == "Male") | | |
| (id < 199 & demo_gender == "Female")) | |
| # Putting rows back together | |
| a <- filter(plotDf, id > 199) | |
| b <- filter(plotDf, id <= 199) | |
| rbind(a, b) | |
| a <- filter(plotDf, id > 199) %>% | |
| rename(age = demo_age) | |
| b <- filter(plotDf, id <= 199) | |
| rbind(a, b) #error | |
| names(b) <- names(a) | |
| # select columns #### | |
| # Base R | |
| plotDf$glvef %>% class() | |
| plotDf[, 1] %>% class() | |
| plotDf[[1]] | |
| plotDf[1] | |
| plotDf[1, 1] | |
| plotDf[[1]][1] | |
| plotDf[, "glvef"] | |
| plotDf[, grepl("demo", names(plotDf))] | |
| # dplyr | |
| select(plotDf, id, glvef) | |
| select(plotDf, glvef:id) | |
| select(plotDf, starts_with("demo")) | |
| select(plotDf, id, GLVEF = glvef) | |
| plotDf <- select(plotDf, arm, id, time, everything() ) | |
| # Putting columns back together | |
| a <- select(plotDf, id, glvef) | |
| b <-select(plotDf, starts_with("demo")) | |
| cbind(a, b) # danger | |
| a <- select(plotDf, id, time, glvef) | |
| b <- select(plotDf, id, time, starts_with("demo")) | |
| g <- full_join(a, b, by = c("id", "time")) | |
| cbind(a, b) # danger | |
| # New/modify variables | |
| # Base R | |
| plotDf$bmi <- plotDf$demo_wt_kg / (plotDf$demo_ht_cm / 100)^2 | |
| plotDf[plotDf$id == 107, ]$demo_ht_cm <- NA | |
| View(plotDf) | |
| plotDf[plotDf$demo_gender == "Male", ]$demo_age <- NA | |
| plotDf <-read_csv(urlfile) | |
| # dplyr | |
| plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2) | |
| mutate(plotDf, bmi_rank = percent_rank(bmi)) %>% View() | |
| plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2) %>% | |
| mutate(bmi_rank = percent_rank(bmi)) | |
| # Arrange by rows | |
| # Base R | |
| plotDf[order(plotDf$demo_age), ] | |
| plotDf[order(plotDf$arm, plotDf$id, plotDf$time), ] | |
| # dplyr | |
| arrange(plotDf, arm, id, time) | |
| # Unique rows | |
| plotDf[!duplicated(plotDf$id), ] | |
| unique(plotDf[c("id")]) | |
| # dplyr | |
| distinct(plotDf, id, .keep_all = TRUE) | |
| distinct(plotDf, id) %>% unlist() %>% as.numeric() | |
| distinct(plotDf, id, time, .keep_all = TRUE) | |
| # Summarizing | |
| summarise(plotDf, | |
| n = n(), | |
| mean_glvef = mean(glvef, na.rm = TRUE), | |
| min = min(glvef, na.rm = TRUE)) | |
| group_by(plotDf, arm, demo_gender) %>% | |
| summarise(n = n(), | |
| mean_glvef = mean(glvef, na.rm = TRUE)) | |
| # Group, summarise, join | |
| group_by(plotDf, arm) %>% | |
| summarise(n = n(), | |
| mean_glvef = mean(glvef, na.rm = TRUE)) %>% | |
| full_join(plotDf, by = "arm") %>% View() | |
| # Group, mutate, join | |
| group_by(plotDf, demo_gender) %>% | |
| mutate(bmi_rank = percent_rank(demo_ht_cm)) %>% | |
| full_join(plotDf, by = "demo_gender") %>% View() | |
| # Back to our plot #### | |
| plotDf <-read.csv(urlfile) | |
| plotDf <- mutate(plotDf, arm = factor(arm, labels = c("Placebo", "Low Dose", "High Dose"))) %>% | |
| mutate(time = factor(time, levels = c("Baseline", "8 weeks"))) | |
| ggplot(plotDf, aes(y = glvef, shape = arm)) + | |
| geom_violin(aes(x = as.numeric(time), group = time), | |
| color = "grey90", fill = "grey90", width = .5) + | |
| geom_line(aes(group = id, | |
| x = as.numeric(time)), | |
| alpha = 0.7, linetype = "dashed") + | |
| facet_wrap(~arm) + | |
| theme_base() + | |
| scale_x_continuous(breaks = c(1, 2), | |
| labels = c("Baseline", "8 weeks")) + | |
| geom_point(aes(group = id, x = as.numeric(time))) + | |
| geom_smooth(aes(x = as.numeric(time)), | |
| method = "lm", se = FALSE, size = 2, color = "black") + | |
| geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "Baseline"), | |
| aes(x = 0.5, | |
| y = mean(glvef, na.rm = TRUE), | |
| ymax = mean(glvef, na.rm = TRUE) + | |
| sd(glvef, na.rm = TRUE), | |
| ymin = mean(glvef, na.rm = TRUE) - | |
| sd(glvef, na.rm = TRUE))) + | |
| geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "8 weeks"), | |
| aes(x = 2.5, | |
| y = mean(glvef, na.rm = TRUE), | |
| ymax = mean(glvef, na.rm = TRUE) + | |
| sd(glvef, na.rm = TRUE), | |
| ymin = mean(glvef, na.rm = TRUE) - | |
| sd(glvef, na.rm = TRUE))) + | |
| geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "Baseline"), | |
| aes(x = 0.5, | |
| y = mean(glvef, na.rm = TRUE), | |
| ymax = mean(glvef, na.rm = TRUE) + | |
| sd(glvef, na.rm = TRUE), | |
| ymin = mean(glvef, na.rm = TRUE) - | |
| sd(glvef, na.rm = TRUE))) + | |
| geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "8 weeks"), | |
| aes(x = 2.5, | |
| y = mean(glvef, na.rm = TRUE), | |
| ymax = mean(glvef, na.rm = TRUE) + | |
| sd(glvef, na.rm = TRUE), | |
| ymin = mean(glvef, na.rm = TRUE) - | |
| sd(glvef, na.rm = TRUE))) + | |
| geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "Baseline"), | |
| aes(x = 0.5, | |
| y = mean(glvef, na.rm = TRUE), | |
| ymax = mean(glvef, na.rm = TRUE) + | |
| sd(glvef, na.rm = TRUE), | |
| ymin = mean(glvef, na.rm = TRUE) - | |
| sd(glvef, na.rm = TRUE))) + | |
| geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "8 weeks"), | |
| aes(x = 2.5, | |
| y = mean(glvef, na.rm = TRUE), | |
| ymax = mean(glvef, na.rm = TRUE) + | |
| sd(glvef, na.rm = TRUE), | |
| ymin = mean(glvef, na.rm = TRUE) - | |
| sd(glvef, na.rm = TRUE))) + | |
| # scale_color_brewer(guide = FALSE, palette = "Set1") + | |
| # scale_fill_manual(guide = FALSE, palette = "Set1") + | |
| scale_shape(guide = FALSE) + | |
| xlab("")+ | |
| ylab("GLVEF (%)") + | |
| ylim(0, 75) + | |
| theme(panel.spacing = unit(2, "lines"), | |
| panel.border = element_rect(color = "white"), | |
| axis.text.y = element_text(size = 16), | |
| strip.text = element_text(size = 16)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment