Created
February 19, 2021 10:50
-
-
Save pr130/5959d2b141f3682f249c0289ee045a8c to your computer and use it in GitHub Desktop.
ggplot geom_line NA behaviour
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
data <- readr::read_csv("https://raw.githubusercontent.com/friep/correlaid-utils/main/correlaid-analytics/data/all_daily.csv") | |
# clean out facebook outliers /errors | |
data <- data %>% | |
filter(!(platform == "facebook" & date > "2020-01-01" & n < 10)) | |
# geom line connects the missing pieces but i don't want that!!! | |
data %>% | |
ggplot(aes(x = date, y = n, color = platform))+ | |
geom_line() | |
# hacky trick: use geom point | |
ggplot(data = data) + | |
geom_point(aes(x = date, y = n, color = platform), size = 0.5)+ | |
labs(title = "CorrelAid platforms over time", caption = "No data was collected between end of 2017 and autumn 2020", x = "") | |
data %>% | |
ggplot(aes(x = lubridate::year(date), fill = is.na(n)))+ | |
geom_bar(position = "stack")+ | |
facet_wrap(~platform, ncol = 2)+ | |
scale_fill_manual(values = c("TRUE" = "grey", "FALSE" = "blue"))+ | |
labs(title = "NA vs non-NA values per year and platform") | |
# turns out it comes down to whether values are implicitly (aka the row is not there) or explicitly missing (the row is there with a NA for n) | |
# complete the data - i don't know how to do this tidyverse style | |
tmp <- expand.grid(date = seq(min(data$date), max(data$date), 1), platform = unique(data$platform)) | |
data_complete <- left_join(tmp, data) | |
data_complete %>% | |
ggplot(aes(x = date, y = n, color = platform))+ | |
geom_line() | |
data_complete %>% | |
ggplot(aes(x = lubridate::year(date), fill = is.na(n)))+ | |
geom_bar(position = "stack")+ | |
facet_wrap(~platform, ncol = 2)+ | |
scale_fill_manual(values = c("TRUE" = "grey", "FALSE" = "blue"))+ | |
labs(title = "NA vs non-NA values per year and platform") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment