Created
March 30, 2022 09:24
-
-
Save dragosmg/673a5b23a434adbe0d4e972d7b8a4620 to your computer and use it in GitHub Desktop.
Benchmarking the difference between the old implementation of `decimal_date()` (making more use of `difftime`) and the new implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # decimal_date_strptime is: | |
| register_binding("decimal_date_strptime", function(date) { | |
| # browser() | |
| y <- build_expr("year", date) | |
| # timezone <- call_binding("tz", date) | |
| start <- call_binding("make_datetime", year = y, tz = "UTC") | |
| end <- call_binding("make_datetime", year = y + 1L, tz = "UTC") | |
| # maybe use yday here | |
| sofar <- call_binding("difftime", date, start, units = "secs") | |
| total <- call_binding("difftime", end, start, units = "secs") | |
| y + sofar$cast(int64()) / total$cast(int64()) | |
| }) | |
| # and decimal_date is: | |
| register_binding("decimal_date", function(date) { | |
| y <- build_expr("year", date) | |
| start <- call_binding("make_datetime", year = y, tz = "UTC") | |
| sofar <- call_binding("difftime", date, start, units = "secs") | |
| total <- call_binding( | |
| "if_else", | |
| build_expr("is_leap_year", date), | |
| Expression$scalar(31622400L), # number of seconds in a leap year (366 days) | |
| Expression$scalar(31536000L) # number of seconds in a regular year (365 days) | |
| ) | |
| y + sofar$cast(int64()) / total | |
| }) | |
| test_df <- tibble( | |
| a = c(2007.38998954347, 1970.77732069883, 2020.96061799722, | |
| 2009.43465948477, 1975.71251467871, NA), | |
| b = as.POSIXct( | |
| c("2007-05-23 08:18:30", "1970-10-11 17:19:45", "2020-12-17 14:04:06", | |
| "2009-06-08 15:37:01", "1975-09-18 01:37:42", NA) | |
| ), | |
| c = as.Date( | |
| c("2007-05-23", "1970-10-11", "2020-12-17", "2009-06-08", "1975-09-18", NA) | |
| ) | |
| ) | |
| test1 <- bench::mark( | |
| new_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_POSIXct = decimal_date(b) | |
| ) %>% | |
| collect(), | |
| old_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_POSIXct = decimal_date_strptime(b) | |
| ) %>% | |
| collect(), | |
| min_iterations = 100 | |
| ) | |
| ggplot2::autoplot(test1) | |
| test2 <- bench::mark( | |
| new_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")) | |
| ) %>% | |
| collect(), | |
| old_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_r_POSIXct_obj = decimal_date_strptime(as.POSIXct("2022-03-25 15:37:01")) | |
| ) %>% | |
| collect(), | |
| min_iterations = 100 | |
| ) | |
| ggplot2::autoplot(test2) | |
| test3 <- bench::mark( | |
| new_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")) | |
| ) %>% | |
| collect(), | |
| old_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_r_date_obj = decimal_date_strptime(ymd("2022-03-25")), | |
| ) %>% | |
| collect(), | |
| min_iterations = 100 | |
| ) | |
| ggplot2::autoplot(test3) | |
| test4 <- bench::mark( | |
| new_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_date = decimal_date(c) | |
| ) %>% | |
| collect(), | |
| old_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_date = decimal_date_strptime(c), | |
| ) %>% | |
| collect(), | |
| min_iterations = 100 | |
| ) | |
| ggplot2::autoplot(test4) | |
| test5 <- bench::mark( | |
| new_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_POSIXct = decimal_date(b), | |
| decimal_date_from_r_POSIXct_obj = decimal_date(as.POSIXct("2022-03-25 15:37:01")), | |
| decimal_date_from_r_date_obj = decimal_date(ymd("2022-03-25")), | |
| decimal_date_from_date = decimal_date(c), | |
| date_from_decimal = date_decimal(a), | |
| date_from_decimal_r_obj = date_decimal(2022.178) | |
| ) %>% | |
| collect(), | |
| old_implementation = test_df %>% | |
| arrow_table() %>% | |
| mutate( | |
| decimal_date_from_POSIXct = decimal_date_strptime(b), | |
| decimal_date_from_r_POSIXct_obj = decimal_date_strptime(as.POSIXct("2022-03-25 15:37:01")), | |
| decimal_date_from_r_date_obj = decimal_date_strptime(ymd("2022-03-25")), | |
| decimal_date_from_date = decimal_date_strptime(c), | |
| date_from_decimal = date_decimal(a), | |
| date_from_decimal_r_obj = date_decimal(2022.178) | |
| ) %>% | |
| collect(), | |
| min_iterations = 100 | |
| ) | |
| ggplot2::autoplot(test5) + | |
| hrbrthemes::theme_ipsum_rc() + | |
| hrbrthemes::scale_color_ipsum() | |
| ggplot2::autoplot(test1) + | |
| hrbrthemes::theme_ipsum_rc() + | |
| hrbrthemes::scale_color_ipsum() + | |
| ggplot2::ggtitle("Test1 = decimal_date from POSIXct column") | |
| ggplot2::autoplot(test2) | |
| ggplot2::autoplot(test3) | |
| ggplot2::autoplot(test4) | |
| ggplot2::autoplot(test5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment