Skip to content

Instantly share code, notes, and snippets.

@CorradoLanera
Last active June 5, 2020 11:25
Show Gist options
  • Save CorradoLanera/da8f7bca18f5089a4f67cb0d73db98ac to your computer and use it in GitHub Desktop.
Save CorradoLanera/da8f7bca18f5089a4f67cb0d73db98ac to your computer and use it in GitHub Desktop.
What Harrell's {Hmisc} thinks about "types". AKA: how to know (in advance) what will be `plot()`ted from an `Hmisc::describe()` object?
## You should have {Hmisc} to check what it produce :-)
requireNamespace("Hmisc", quietly = TRUE)
## If you would run the tests (you do not have to), you need {testthat}.
## TL;DR: passing tests won't produce any output!
can_test <- requireNamespace("testthat", quietly = TRUE)
## Note: for easy of notation I will call an object of class "describe"
## in the Harrell's world as "Hdesc" object (w/ capital H, in his
## honour).
## Instructions: You can infer how to use each function (including the
## results it should produce in output) from the test
## definitions and their expectations.
# Class checks ----------------------------------------------------
is_single_Hdesc <- function(x) {
(class(x) == "describe") && (class(x[[1L]]) != "describe")
}
if (can_test) {
testthat::test_that("original check works", {
test_desc <- Hmisc::describe(mtcars)
testthat::expect_false(is_single_Hdesc(test_desc))
testthat::expect_true(is_single_Hdesc(test_desc[[1]]))
testthat::expect_false(is_single_Hdesc(mtcars))
})
}
is_Hdesc <- function(x) {
# if `Hmisc::describe()` results in a single variable, it is directly
# the single Hdesc object and not a list with only a single Hdesc
# object!
(class(x) == "describe")
}
if (can_test) {
testthat::test_that("original check works", {
test_desc <- Hmisc::describe(mtcars)
testthat::expect_true(is_Hdesc(test_desc))
testthat::expect_true(is_Hdesc(test_desc[[1]]))
testthat::expect_false(is_Hdesc(mtcars))
})
}
# Single-type checks ----------------------------------------------
## Note: The following two functions have been produced/deduced from
## reading the source code of `Hmisc::plot.describe`, in
## particular from the definition of the (two distinct) functions
## `f` defined within it (one for categorical variables and the
## other for continuous variables). Both lead to a possible
## execution of `warning("no categorical variable found")` or
## `warning("no continuous variable found")`, suggesting that the
## rules I was looking for should be found in those lines.
## I tried to keep the same names/code/logic that I found there.
is_Hcat <- function(x) {
# Note: the following syntax for `stopifnot()` require R4.0.0+
stopifnot(
`x must be a single (sub)element of an Hmisc::describe() output` =
is_single_Hdesc(x)
)
s <- x$counts
v <- x$values
(("Sum" %in% names(s)) && (as.numeric(s["Sum"]) > 0)) ||
(
length(v) &&
is.list(v) &&
all(names(v) == c("value", "frequency")) &&
length(v$frequency) &&
is.character(v$value) && (length(v$value) <= 20)
)
}
if (can_test) {
testthat::test_that("original check works", {
test_desc <- Hmisc::describe(mtcars)
testthat::expect_true(is_Hcat(test_desc[["vs"]]))
testthat::expect_false(is_Hcat(test_desc[["mpg"]]))
testthat::expect_error(is_Hcat(test_desc), "must be a single")
})
}
is_Hcon <- function(x, n.unique = 10) {
stopifnot(
`x must be a single (sub)element of an Hmisc::describe() output` =
is_single_Hdesc(x)
)
s <- x$counts
v <- x$values
length(v) &&
is.list(v) &&
all(names(v) == c("value", "frequency")) &&
("distinct" %in% names(s)) &&
(as.numeric(s["distinct"]) >= n.unique) &&
(is.numeric(v$value) || Hmisc::testDateTime(v$value, "either"))
}
if (can_test) {
testthat::test_that("original check works", {
test_desc <- Hmisc::describe(mtcars)
testthat::expect_false(is_Hcon(test_desc[["vs"]]))
testthat::expect_true(is_Hcon(test_desc[["mpg"]]))
testthat::expect_error(is_Hcat(test_desc), "must be a single")
test_nunique <- Hmisc::describe(airquality)
testthat::expect_false(is_Hcon(test_nunique[["Month"]]))
testthat::expect_true(
is_Hcon(test_nunique[["Month"]], n.unique = 4)
)
})
}
# Get a single type -----------------------------------------------
Htype <- function(x, n.unique = 10) {
is_con <- is_Hcon(x, n.unique = n.unique)
is_cat <- is_Hcat(x)
htype <- c("cat", "con")[c(is_cat, is_con)]
if (length(htype) == 0) return("none")
if (length(htype) == 1) return(htype)
if (length(htype) == 2) return({
warning(
"Strange behaviour: both cat and con! (this would never happen)"
)
"both"
})
}
if (can_test) {
testthat::test_that("Htypes works", {
test_desc <- Hmisc::describe(mtcars)
testthat::expect_equal(Htype(test_desc[["vs"]]), "cat")
testthat::expect_equal(Htype(test_desc[["mpg"]]), "con")
testthat::expect_equal(Htype(test_desc[["carb"]]), "none")
testthat::expect_equal(
Htype(test_desc[["carb"]], n.unique = 4),
"con"
)
testthat::expect_error(Htype(test_desc), "must be a single")
})
}
# Generic and methods for general usage ---------------------------
Htypes <- function(x, n.unique) {
UseMethod("Htypes", x)
}
Htypes.describe <- function(x, n.unique = 10) {
stopifnot(
`x must be an Hmisc::describe() object (or one of its elements)` =
is_Hdesc(x)
)
if (is_single_Hdesc(x)) return(Htype(x, n.unique = n.unique))
vapply(x, Htype, FUN.VALUE = character(1))
}
Htypes.default <- function(x, n.unique = 10) {
Htypes(Hmisc::describe(x))
}
if (can_test) {
testthat::test_that("Htypes works", {
test_desc <- Hmisc::describe(mtcars)
# Note: the following expectation was deduced from the output of
# `plot(test_desc)`.
expected <- c(
mpg = "con", disp = "con", hp = "con",
drat = "con", wt = "con", qsec = "con",
vs = "cat", am = "cat",
cyl = "none", gear = "none", carb = "none"
)
## all together
# Note: `expect_setequal()` because the order would be probabily
# different.
testthat::expect_setequal(Htypes(test_desc), expected)
## one at time
testthat::expect_equal(Htypes(test_desc[["mpg"]]), "con")
testthat::expect_equal(Htypes(test_desc[["vs"]]), "cat")
testthat::expect_equal(Htypes(test_desc[["cyl"]]), "none")
## input directly a data.frame
testthat::expect_setequal(Htypes(mtcars), expected)
## input a single vector
# Note: `plot(describe(letters))` throws an error
testthat::expect_equal(Htypes(letters), "none")
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment