Last active
March 11, 2018 00:17
-
-
Save jkeirstead/df85c839bd8f0026ee05 to your computer and use it in GitHub Desktop.
Draw waterfall charts in R. See post at http://www.jameskeirstead.ca/blog/waterfall-plots-in-r
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
category | value | sector | |
---|---|---|---|
UK production emissions | 632 | UK | |
Carbon flows from EU | 88 | EU | |
Carbon flows to EU | -61 | EU | |
Carbon flows from other Annex 1 | 82 | Annex 1 | |
Carbon flows to other Annex 1 | -39 | Annex 1 | |
Carbon flows from non-Annex 1 | 104 | Other non-Annex 1 | |
Carbon flows from non-Annex 1 | 64 | China | |
Carbon flows to non-Annex 1 | -25 | Non-Annex 1 | |
UK consumption emissions | 845 | UK |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source("waterfall.r") | |
## Load the data and set correct column names | |
df <- read.csv("data.csv", stringsAsFactors=FALSE) | |
## ----further-prep-------------------------------------------------------- | |
## Tidy the levels | |
df$category <- factor(df$category, levels=unique(df$category)) | |
levels(df$category) <- gsub("flows ", "flows \n", levels(df$category)) | |
levels(df$category) <- gsub(" emissions", "\nemissions", levels(df$category)) | |
df$sector <- factor(df$sector, levels=c("UK", "EU", "Annex 1", "Non-Annex 1", | |
"China", "Other non-Annex 1")) | |
## ----prepare-plot, echo=TRUE--------------------------------------------- | |
## Determines the spacing between columns in the waterfall chart | |
offset <- 0.3 | |
gg <- waterfall(df, offset=offset) + | |
coord_cartesian(ylim=c(600, 900)) + | |
scale_fill_manual(guide="none", values=c(rgb(81, 34, 112, max=255), | |
rgb(125, 96, 153, max=255), | |
rgb(116, 173, 226, max=255), | |
rgb(17, 135, 146, max=255), | |
rgb(69, 171, 183, max=255), | |
rgb(17, 135, 146, max=255))) + | |
labs(x="", y="Mt CO2", | |
title="UK embodied emissions balance (import and export) with major regions, 2004") + | |
theme_classic() + | |
annotate("text", x=6, y=838, label="China", colour="white") + | |
annotate("text", x=8 + offset, y=900, | |
hjust=1, vjust=1, | |
size=3, | |
label="Data source: Carbon Trust (2011)", | |
fontface="italic") + | |
theme(plot.title=element_text(face="bold", hjust=0, vjust=2)) | |
## ----plot, dev='png', fig.width=12, fig.height=7.5----------------------- | |
print(gg) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Makes a waterfall plot | |
#' | |
#' Makes a waterfall plot using ggplot2. The bars will be plotted in | |
#' the order specified by the factoring of the 'category' column. | |
#' Values should represent the positive or negative changes relative | |
#' to the previous bar. | |
#' | |
#' @param df a dataframe with columns 'category' (an ordered factor), | |
#' 'value' (numeric), and 'sector' (character) | |
#' @param offset the spacing between the columns, default = 0.3 | |
#' | |
#' @examples | |
#' raw <- data.frame(category=c("A", "B", "C", "D"), | |
#' value=c(100, -20, 10, 90), | |
#' sector=1) | |
#' | |
#' df1 <- transform(raw, category=factor(category)) | |
#' waterfall(df1) + theme_bw() + labs(x="", y="Value") | |
#' | |
#' df2 <- transform(raw, category=factor(category, levels=c("A", "C", "B", "D"))) | |
#' waterfall(df2) + theme_bw() + labs(x="", y="Value") | |
#' | |
#' @return a ggplot2 object | |
waterfall <- function(df, offset=0.3) { | |
library(ggplot2) | |
library(scales) | |
library(dplyr) | |
## Add the order column to the raw data frame and order appropriately | |
df <- df %>% mutate(order=as.numeric(category)) %>% arrange(order) | |
## The last value needs to be negated so that it goes down to | |
## zero. Throws a warning if the cumulative sum doesn't match. | |
last.id <- nrow(df) | |
df$value[last.id] <- -df$value[last.id] | |
## Calculate the cumulative sums | |
df <- df %>% mutate(cs1=cumsum(value)) | |
## Throw a warning if the values don't match zero as expected | |
final_value <- tail(df$cs1, 1) | |
if (final_value!=0) { | |
warning(sprintf("Final value doesn't return to 0. %.2d instead.", final_value)) | |
} | |
## Calculate the max and mins for each category and sector | |
df <- transform(df, min.val=c(0, head(cs1, -1)), | |
max.val=c(head(cs1, -1), 0)) | |
df <- df %>% group_by(order, category, sector, value, cs1) %>% | |
summarize(min=min(min.val, max.val), max=max(min.val, max.val)) | |
## Create the lines data frame to link the bars | |
lines <- df %>% group_by(order) %>% summarize(cs=max(cs1)) | |
lines <- with(lines, data.frame(x=head(order, -1), | |
xend=tail(order, -1), | |
y=head(cs, -1), | |
yend=head(cs, -1))) | |
## Add the offset parameter | |
df <- transform(df, offset=offset) | |
## Make the plot | |
gg <- ggplot() + | |
geom_segment(data=lines, aes(x=x, y=y, xend=xend, yend=yend), linetype="dashed") + | |
geom_rect(data=df, aes(xmin=order - offset, | |
xmax=order + offset, | |
ymin=min, | |
ymax=max, fill=sector)) + | |
scale_x_continuous(breaks=unique(df$order), labels=unique(df$category)) | |
return(gg) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment