Skip to content

Instantly share code, notes, and snippets.

@cpcloud
Created January 13, 2022 02:07
Show Gist options
  • Save cpcloud/b83e549e325423c74e6946d0eca0da4d to your computer and use it in GitHub Desktop.
Save cpcloud/b83e549e325423c74e6946d0eca0da4d to your computer and use it in GitHub Desktop.
ibis ci analysis
library(ggplot2)
library(stringr)
library(tidyr)
library(ggh4x)
library(bigrquery)
library(DBI)
library(dplyr, warn.conflicts = FALSE)
library(lubridate, warn.conflicts = FALSE)
suppressPackageStartupMessages(library(hrbrthemes))
disp <- function (p) {
x11()
plot(p)
Sys.sleep(Inf)
}
con <- dbConnect(
bigrquery::bigquery(),
project = "ibis-gbq",
dataset = "workflows",
)
BREAKS <- "day"
PERCENTILE <- c(50, 85, 95, 99)
POETRY_MERGED_LABEL <- "Move to poetry"
TEXT_SIZE <- 7
NUDGE_X <- -30
DESIGN <- "
B
A
C
"
poetry_merged <- as.Date("2021-10-15")
jobs <- tbl(con, "jobs")
workflows <- tbl(con, "workflows")
data <- jobs %>%
transmute(
job_name = name,
duration = timestamp_diff(completed_at, started_at, SECOND),
started_date = as.Date(started_at),
run_id,
has_poetry = started_at > poetry_merged
) %>%
inner_join(workflows, by = c("run_id" = "id")) %>%
transmute(
workflow_name = name,
job_name,
duration,
started_date,
has_poetry
) %>%
collect()
agg <- data %>%
group_by(started_date, has_poetry) %>%
summarise(
across(
duration,
list(
mean = mean,
perc = function (x) {
q <- c(PERCENTILE / 100.0)
tibble(
"{{ x }}" := quantile(x, q, na.rm = TRUE),
"{{ x }}_q" := str_c("duration_", as.integer(PERCENTILE))
)
}
)
)
) %>%
unnest(duration_perc) %>%
rename(duration_perc = duration) %>%
pivot_wider(names_from = duration_q, values_from = duration_perc) %>%
pivot_longer(cols = starts_with("duration"))
format_hm <- function(sec) {
str_sub(format(sec), end = -4L)
}
p <- agg %>%
filter(value > 0) %>%
ggplot(aes(x = started_date, y = hms::as_hms(value), fill = name, color = has_poetry)) +
geom_point() +
geom_smooth(method = lm) +
theme_ft_rc() +
theme(
legend.position = "none",
panel.spacing = unit(0.1, "lines"),
axis.title.x = element_text(size = 10),
axis.title.y = element_text(size = 15),
strip.text.x = element_text(size = 20)
) +
labs(x = "Date", title = "Ibis Workflow Duration Over Time") +
scale_y_time(name = "Minutes", labels = format_hm) +
geom_vline(xintercept = poetry_merged, alpha = 0.3, colour = "white") +
geom_text(
data = agg %>% filter(name == "duration_85"),
aes(x = poetry_merged, label = POETRY_MERGED_LABEL, y = 2000),
angle = 0,
nudge_x = NUDGE_X,
size = TEXT_SIZE
) +
scale_x_date(date_labels = "%b %Y") +
facet_manual(
~name,
# scales = "free_y",
design = DESIGN,
labeller = as_labeller(
c(
duration_50 = "Median",
duration_85 = "85th Percentile",
duration_mean = "Mean"
)
)
)
disp(p)
# ggsave("workflow_time.png", width = 15, height = 15, units = "in")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment