Skip to content

Instantly share code, notes, and snippets.

@dragosmg
Created March 30, 2022 09:24
Show Gist options
  • Select an option

  • Save dragosmg/673a5b23a434adbe0d4e972d7b8a4620 to your computer and use it in GitHub Desktop.

Select an option

Save dragosmg/673a5b23a434adbe0d4e972d7b8a4620 to your computer and use it in GitHub Desktop.
Benchmarking the difference between the old implementation of `decimal_date()` (making more use of `difftime`) and the new implementation
# decimal_date_strptime is:
register_binding("decimal_date_strptime", function(date) {
# browser()
y <- build_expr("year", date)
# timezone <- call_binding("tz", date)
start <- call_binding("make_datetime", year = y, tz = "UTC")
end <- call_binding("make_datetime", year = y + 1L, tz = "UTC")
# maybe use yday here
sofar <- call_binding("difftime", date, start, units = "secs")
total <- call_binding("difftime", end, start, units = "secs")
y + sofar$cast(int64()) / total$cast(int64())
})
# and decimal_date is:
register_binding("decimal_date", function(date) {
y <- build_expr("year", date)
start <- call_binding("make_datetime", year = y, tz = "UTC")
sofar <- call_binding("difftime", date, start, units = "secs")
total <- call_binding(
"if_else",
build_expr("is_leap_year", date),
Expression$scalar(31622400L), # number of seconds in a leap year (366 days)
Expression$scalar(31536000L) # number of seconds in a regular year (365 days)
)
y + sofar$cast(int64()) / total
})
test_df <- tibble(
a = c(2007.38998954347, 1970.77732069883, 2020.96061799722,
2009.43465948477, 1975.71251467871, NA),
b = as.POSIXct(
c("2007-05-23 08:18:30", "1970-10-11 17:19:45", "2020-12-17 14:04:06",
"2009-06-08 15:37:01", "1975-09-18 01:37:42", NA)
),
c = as.Date(
c("2007-05-23", "1970-10-11", "2020-12-17", "2009-06-08", "1975-09-18", NA)
)
)
test1 <- bench::mark(
new_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_POSIXct = decimal_date(b)
) %>%
collect(),
old_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_POSIXct = decimal_date_strptime(b)
) %>%
collect(),
min_iterations = 100
)
ggplot2::autoplot(test1)
test2 <- bench::mark(
new_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01"))
) %>%
collect(),
old_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_r_POSIXct_obj = decimal_date_strptime(as.POSIXct("2022-03-25 15:37:01"))
) %>%
collect(),
min_iterations = 100
)
ggplot2::autoplot(test2)
test3 <- bench::mark(
new_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25"))
) %>%
collect(),
old_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_r_date_obj = decimal_date_strptime(ymd("2022-03-25")),
) %>%
collect(),
min_iterations = 100
)
ggplot2::autoplot(test3)
test4 <- bench::mark(
new_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_date = decimal_date(c)
) %>%
collect(),
old_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_date = decimal_date_strptime(c),
) %>%
collect(),
min_iterations = 100
)
ggplot2::autoplot(test4)
test5 <- bench::mark(
new_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_POSIXct = decimal_date(b),
decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")),
decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")),
decimal_date_from_date = decimal_date(c),
date_from_decimal = date_decimal(a),
date_from_decimal_r_obj = date_decimal(2022.178)
) %>%
collect(),
old_implementation = test_df %>%
arrow_table() %>%
mutate(
decimal_date_from_POSIXct = decimal_date_strptime(b),
decimal_date_from_r_POSIXct_obj = decimal_date_strptime(as.POSIXct("2022-03-25 15:37:01")),
decimal_date_from_r_date_obj = decimal_date_strptime(ymd("2022-03-25")),
decimal_date_from_date = decimal_date_strptime(c),
date_from_decimal = date_decimal(a),
date_from_decimal_r_obj = date_decimal(2022.178)
) %>%
collect(),
min_iterations = 100
)
ggplot2::autoplot(test5) +
hrbrthemes::theme_ipsum_rc() +
hrbrthemes::scale_color_ipsum()
ggplot2::autoplot(test1) +
hrbrthemes::theme_ipsum_rc() +
hrbrthemes::scale_color_ipsum() +
ggplot2::ggtitle("Test1 = decimal_date from POSIXct column")
ggplot2::autoplot(test2)
ggplot2::autoplot(test3)
ggplot2::autoplot(test4)
ggplot2::autoplot(test5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment