Last active
June 9, 2017 10:54
-
-
Save dantalus/3ffe96112c77be50c7d1c53f2c9b100a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
urlfile <-"https://raw.githubusercontent.com/dantalus/intro_workshop/master/plot.csv" | |
plotDf <-read.csv(urlfile) | |
# install.packages(c("ggthemes", "ggbeeswarm")) | |
library(ggthemes) | |
library(ggbeeswarm) | |
library(ggplot2) | |
library(dplyr) | |
# WTF is a tibble? | |
class(plotDf) | |
as.tbl(plotDf) | |
plotDf <- as.tbl(plotDf) | |
class(plotDf) <- "data.frame" | |
library(readr) | |
plotDf <-read_csv(urlfile) | |
plotDf <-as_tbl(read.csv(urlfile)) | |
plotDf <-read.csv(urlfile) %>% as_tbl() | |
# Rename variables | |
# Base R | |
plotDf$age <- plotDf$demo_age | |
plotDf[, -5] | |
plotDf[, !grepl("demo_age", names(plotDf))] | |
names(plotDf)[5] <- "age" | |
# dplyr | |
plotDf <-read_csv(urlfile) | |
rename(plotDf, age = demo_age) | |
# Select rows #### | |
# Base R | |
plotDf[1:5, ] | |
plotDf[plotDf$id > 199, ] | |
plotDf[plotDf$id > 199 & plotDf$demo_gender == "Male", ] | |
plotDf[grepl(" A", plotDf$arm), ] # character matching | |
r <- plotDf$id > 199 | |
plotDf[r, ] | |
r <- plotDf$id %in% c(201, 202, 101) | |
plotDf[r, ] | |
subset(plotDf, id > 199) | |
# dplyr | |
filter(plotDf, id > 199 & demo_gender == "Male") | |
filter(plotDf, id > 199 | demo_gender == "Male") | |
filter(plotDf, (id > 199 & demo_gender == "Male") | | |
(id < 199 & demo_gender == "Female")) | |
# Putting rows back together | |
a <- filter(plotDf, id > 199) | |
b <- filter(plotDf, id <= 199) | |
rbind(a, b) | |
a <- filter(plotDf, id > 199) %>% | |
rename(age = demo_age) | |
b <- filter(plotDf, id <= 199) | |
rbind(a, b) #error | |
names(b) <- names(a) | |
# select columns #### | |
# Base R | |
plotDf$glvef %>% class() | |
plotDf[, 1] %>% class() | |
plotDf[[1]] | |
plotDf[1] | |
plotDf[1, 1] | |
plotDf[[1]][1] | |
plotDf[, "glvef"] | |
plotDf[, grepl("demo", names(plotDf))] | |
# dplyr | |
select(plotDf, id, glvef) | |
select(plotDf, glvef:id) | |
select(plotDf, starts_with("demo")) | |
select(plotDf, id, GLVEF = glvef) | |
plotDf <- select(plotDf, arm, id, time, everything() ) | |
# Putting columns back together | |
a <- select(plotDf, id, glvef) | |
b <-select(plotDf, starts_with("demo")) | |
cbind(a, b) # danger | |
a <- select(plotDf, id, time, glvef) | |
b <- select(plotDf, id, time, starts_with("demo")) | |
g <- full_join(a, b, by = c("id", "time")) | |
cbind(a, b) # danger | |
# New/modify variables | |
# Base R | |
plotDf$bmi <- plotDf$demo_wt_kg / (plotDf$demo_ht_cm / 100)^2 | |
plotDf[plotDf$id == 107, ]$demo_ht_cm <- NA | |
View(plotDf) | |
plotDf[plotDf$demo_gender == "Male", ]$demo_age <- NA | |
plotDf <-read_csv(urlfile) | |
# dplyr | |
plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2) | |
mutate(plotDf, bmi_rank = percent_rank(bmi)) %>% View() | |
plotDf <- mutate(plotDf, bmi = demo_wt_kg / (demo_ht_cm / 100)^2) %>% | |
mutate(bmi_rank = percent_rank(bmi)) | |
# Arrange by rows | |
# Base R | |
plotDf[order(plotDf$demo_age), ] | |
plotDf[order(plotDf$arm, plotDf$id, plotDf$time), ] | |
# dplyr | |
arrange(plotDf, arm, id, time) | |
# Unique rows | |
plotDf[!duplicated(plotDf$id), ] | |
unique(plotDf[c("id")]) | |
# dplyr | |
distinct(plotDf, id, .keep_all = TRUE) | |
distinct(plotDf, id) %>% unlist() %>% as.numeric() | |
distinct(plotDf, id, time, .keep_all = TRUE) | |
# Summarizing | |
summarise(plotDf, | |
n = n(), | |
mean_glvef = mean(glvef, na.rm = TRUE), | |
min = min(glvef, na.rm = TRUE)) | |
group_by(plotDf, arm, demo_gender) %>% | |
summarise(n = n(), | |
mean_glvef = mean(glvef, na.rm = TRUE)) | |
# Group, summarise, join | |
group_by(plotDf, arm) %>% | |
summarise(n = n(), | |
mean_glvef = mean(glvef, na.rm = TRUE)) %>% | |
full_join(plotDf, by = "arm") %>% View() | |
# Group, mutate, join | |
group_by(plotDf, demo_gender) %>% | |
mutate(bmi_rank = percent_rank(demo_ht_cm)) %>% | |
full_join(plotDf, by = "demo_gender") %>% View() | |
# Back to our plot #### | |
plotDf <-read.csv(urlfile) | |
plotDf <- mutate(plotDf, arm = factor(arm, labels = c("Placebo", "Low Dose", "High Dose"))) %>% | |
mutate(time = factor(time, levels = c("Baseline", "8 weeks"))) | |
ggplot(plotDf, aes(y = glvef, shape = arm)) + | |
geom_violin(aes(x = as.numeric(time), group = time), | |
color = "grey90", fill = "grey90", width = .5) + | |
geom_line(aes(group = id, | |
x = as.numeric(time)), | |
alpha = 0.7, linetype = "dashed") + | |
facet_wrap(~arm) + | |
theme_base() + | |
scale_x_continuous(breaks = c(1, 2), | |
labels = c("Baseline", "8 weeks")) + | |
geom_point(aes(group = id, x = as.numeric(time))) + | |
geom_smooth(aes(x = as.numeric(time)), | |
method = "lm", se = FALSE, size = 2, color = "black") + | |
geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "Baseline"), | |
aes(x = 0.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "Placebo" & time == "8 weeks"), | |
aes(x = 2.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "Baseline"), | |
aes(x = 0.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "Low Dose" & time == "8 weeks"), | |
aes(x = 2.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "Baseline"), | |
aes(x = 0.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
geom_pointrange(data = filter(plotDf, arm == "High Dose" & time == "8 weeks"), | |
aes(x = 2.5, | |
y = mean(glvef, na.rm = TRUE), | |
ymax = mean(glvef, na.rm = TRUE) + | |
sd(glvef, na.rm = TRUE), | |
ymin = mean(glvef, na.rm = TRUE) - | |
sd(glvef, na.rm = TRUE))) + | |
# scale_color_brewer(guide = FALSE, palette = "Set1") + | |
# scale_fill_manual(guide = FALSE, palette = "Set1") + | |
scale_shape(guide = FALSE) + | |
xlab("")+ | |
ylab("GLVEF (%)") + | |
ylim(0, 75) + | |
theme(panel.spacing = unit(2, "lines"), | |
panel.border = element_rect(color = "white"), | |
axis.text.y = element_text(size = 16), | |
strip.text = element_text(size = 16)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment