Skip to content

Instantly share code, notes, and snippets.

@pr130
Created February 19, 2021 10:50
Show Gist options
  • Save pr130/5959d2b141f3682f249c0289ee045a8c to your computer and use it in GitHub Desktop.
Save pr130/5959d2b141f3682f249c0289ee045a8c to your computer and use it in GitHub Desktop.
ggplot geom_line NA behaviour
library(tidyverse)
data <- readr::read_csv("https://raw.githubusercontent.com/friep/correlaid-utils/main/correlaid-analytics/data/all_daily.csv")
# clean out facebook outliers /errors
data <- data %>%
filter(!(platform == "facebook" & date > "2020-01-01" & n < 10))
# geom line connects the missing pieces but i don't want that!!!
data %>%
ggplot(aes(x = date, y = n, color = platform))+
geom_line()
# hacky trick: use geom point
ggplot(data = data) +
geom_point(aes(x = date, y = n, color = platform), size = 0.5)+
labs(title = "CorrelAid platforms over time", caption = "No data was collected between end of 2017 and autumn 2020", x = "")
data %>%
ggplot(aes(x = lubridate::year(date), fill = is.na(n)))+
geom_bar(position = "stack")+
facet_wrap(~platform, ncol = 2)+
scale_fill_manual(values = c("TRUE" = "grey", "FALSE" = "blue"))+
labs(title = "NA vs non-NA values per year and platform")
# turns out it comes down to whether values are implicitly (aka the row is not there) or explicitly missing (the row is there with a NA for n)
# complete the data - i don't know how to do this tidyverse style
tmp <- expand.grid(date = seq(min(data$date), max(data$date), 1), platform = unique(data$platform))
data_complete <- left_join(tmp, data)
data_complete %>%
ggplot(aes(x = date, y = n, color = platform))+
geom_line()
data_complete %>%
ggplot(aes(x = lubridate::year(date), fill = is.na(n)))+
geom_bar(position = "stack")+
facet_wrap(~platform, ncol = 2)+
scale_fill_manual(values = c("TRUE" = "grey", "FALSE" = "blue"))+
labs(title = "NA vs non-NA values per year and platform")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment