Last active
June 5, 2020 11:25
-
-
Save CorradoLanera/da8f7bca18f5089a4f67cb0d73db98ac to your computer and use it in GitHub Desktop.
What Harrell's {Hmisc} thinks about "types". AKA: how to know (in advance) what will be `plot()`ted from an `Hmisc::describe()` object?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## You should have {Hmisc} to check what it produce :-) | |
requireNamespace("Hmisc", quietly = TRUE) | |
## If you would run the tests (you do not have to), you need {testthat}. | |
## TL;DR: passing tests won't produce any output! | |
can_test <- requireNamespace("testthat", quietly = TRUE) | |
## Note: for easy of notation I will call an object of class "describe" | |
## in the Harrell's world as "Hdesc" object (w/ capital H, in his | |
## honour). | |
## Instructions: You can infer how to use each function (including the | |
## results it should produce in output) from the test | |
## definitions and their expectations. | |
# Class checks ---------------------------------------------------- | |
is_single_Hdesc <- function(x) { | |
(class(x) == "describe") && (class(x[[1L]]) != "describe") | |
} | |
if (can_test) { | |
testthat::test_that("original check works", { | |
test_desc <- Hmisc::describe(mtcars) | |
testthat::expect_false(is_single_Hdesc(test_desc)) | |
testthat::expect_true(is_single_Hdesc(test_desc[[1]])) | |
testthat::expect_false(is_single_Hdesc(mtcars)) | |
}) | |
} | |
is_Hdesc <- function(x) { | |
# if `Hmisc::describe()` results in a single variable, it is directly | |
# the single Hdesc object and not a list with only a single Hdesc | |
# object! | |
(class(x) == "describe") | |
} | |
if (can_test) { | |
testthat::test_that("original check works", { | |
test_desc <- Hmisc::describe(mtcars) | |
testthat::expect_true(is_Hdesc(test_desc)) | |
testthat::expect_true(is_Hdesc(test_desc[[1]])) | |
testthat::expect_false(is_Hdesc(mtcars)) | |
}) | |
} | |
# Single-type checks ---------------------------------------------- | |
## Note: The following two functions have been produced/deduced from | |
## reading the source code of `Hmisc::plot.describe`, in | |
## particular from the definition of the (two distinct) functions | |
## `f` defined within it (one for categorical variables and the | |
## other for continuous variables). Both lead to a possible | |
## execution of `warning("no categorical variable found")` or | |
## `warning("no continuous variable found")`, suggesting that the | |
## rules I was looking for should be found in those lines. | |
## I tried to keep the same names/code/logic that I found there. | |
is_Hcat <- function(x) { | |
# Note: the following syntax for `stopifnot()` require R4.0.0+ | |
stopifnot( | |
`x must be a single (sub)element of an Hmisc::describe() output` = | |
is_single_Hdesc(x) | |
) | |
s <- x$counts | |
v <- x$values | |
(("Sum" %in% names(s)) && (as.numeric(s["Sum"]) > 0)) || | |
( | |
length(v) && | |
is.list(v) && | |
all(names(v) == c("value", "frequency")) && | |
length(v$frequency) && | |
is.character(v$value) && (length(v$value) <= 20) | |
) | |
} | |
if (can_test) { | |
testthat::test_that("original check works", { | |
test_desc <- Hmisc::describe(mtcars) | |
testthat::expect_true(is_Hcat(test_desc[["vs"]])) | |
testthat::expect_false(is_Hcat(test_desc[["mpg"]])) | |
testthat::expect_error(is_Hcat(test_desc), "must be a single") | |
}) | |
} | |
is_Hcon <- function(x, n.unique = 10) { | |
stopifnot( | |
`x must be a single (sub)element of an Hmisc::describe() output` = | |
is_single_Hdesc(x) | |
) | |
s <- x$counts | |
v <- x$values | |
length(v) && | |
is.list(v) && | |
all(names(v) == c("value", "frequency")) && | |
("distinct" %in% names(s)) && | |
(as.numeric(s["distinct"]) >= n.unique) && | |
(is.numeric(v$value) || Hmisc::testDateTime(v$value, "either")) | |
} | |
if (can_test) { | |
testthat::test_that("original check works", { | |
test_desc <- Hmisc::describe(mtcars) | |
testthat::expect_false(is_Hcon(test_desc[["vs"]])) | |
testthat::expect_true(is_Hcon(test_desc[["mpg"]])) | |
testthat::expect_error(is_Hcat(test_desc), "must be a single") | |
test_nunique <- Hmisc::describe(airquality) | |
testthat::expect_false(is_Hcon(test_nunique[["Month"]])) | |
testthat::expect_true( | |
is_Hcon(test_nunique[["Month"]], n.unique = 4) | |
) | |
}) | |
} | |
# Get a single type ----------------------------------------------- | |
Htype <- function(x, n.unique = 10) { | |
is_con <- is_Hcon(x, n.unique = n.unique) | |
is_cat <- is_Hcat(x) | |
htype <- c("cat", "con")[c(is_cat, is_con)] | |
if (length(htype) == 0) return("none") | |
if (length(htype) == 1) return(htype) | |
if (length(htype) == 2) return({ | |
warning( | |
"Strange behaviour: both cat and con! (this would never happen)" | |
) | |
"both" | |
}) | |
} | |
if (can_test) { | |
testthat::test_that("Htypes works", { | |
test_desc <- Hmisc::describe(mtcars) | |
testthat::expect_equal(Htype(test_desc[["vs"]]), "cat") | |
testthat::expect_equal(Htype(test_desc[["mpg"]]), "con") | |
testthat::expect_equal(Htype(test_desc[["carb"]]), "none") | |
testthat::expect_equal( | |
Htype(test_desc[["carb"]], n.unique = 4), | |
"con" | |
) | |
testthat::expect_error(Htype(test_desc), "must be a single") | |
}) | |
} | |
# Generic and methods for general usage --------------------------- | |
Htypes <- function(x, n.unique) { | |
UseMethod("Htypes", x) | |
} | |
Htypes.describe <- function(x, n.unique = 10) { | |
stopifnot( | |
`x must be an Hmisc::describe() object (or one of its elements)` = | |
is_Hdesc(x) | |
) | |
if (is_single_Hdesc(x)) return(Htype(x, n.unique = n.unique)) | |
vapply(x, Htype, FUN.VALUE = character(1)) | |
} | |
Htypes.default <- function(x, n.unique = 10) { | |
Htypes(Hmisc::describe(x)) | |
} | |
if (can_test) { | |
testthat::test_that("Htypes works", { | |
test_desc <- Hmisc::describe(mtcars) | |
# Note: the following expectation was deduced from the output of | |
# `plot(test_desc)`. | |
expected <- c( | |
mpg = "con", disp = "con", hp = "con", | |
drat = "con", wt = "con", qsec = "con", | |
vs = "cat", am = "cat", | |
cyl = "none", gear = "none", carb = "none" | |
) | |
## all together | |
# Note: `expect_setequal()` because the order would be probabily | |
# different. | |
testthat::expect_setequal(Htypes(test_desc), expected) | |
## one at time | |
testthat::expect_equal(Htypes(test_desc[["mpg"]]), "con") | |
testthat::expect_equal(Htypes(test_desc[["vs"]]), "cat") | |
testthat::expect_equal(Htypes(test_desc[["cyl"]]), "none") | |
## input directly a data.frame | |
testthat::expect_setequal(Htypes(mtcars), expected) | |
## input a single vector | |
# Note: `plot(describe(letters))` throws an error | |
testthat::expect_equal(Htypes(letters), "none") | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment