Last active
June 3, 2025 23:21
-
-
Save MichaelChirico/8df9b42e0e42d32828b34a641d33dc8c to your computer and use it in GitHub Desktop.
Comparing plyr::rbind.fill and dplyr::bind_rows()
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# quick look sheet for comparing plyr::rbind.fill --> dplyr::bind_rows() | |
# NB: I am only interested in migrating rbind.fill-->bind_rows(), so | |
# features of bind_rows() absent from rbind.fill(), e.g. .id=, are not examined. | |
rbind.fill = plyr::rbind.fill | |
bind_rows = dplyr::bind_rows | |
DF1 = data.frame(a = 1, b = 2) | |
DF2 = data.frame(a = 1, b = 2) | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
# API equivalence | |
all.equal(rbind.fill(list(DF1, DF2)), bind_rows(list(DF1, DF2))) | |
all.equal(do.call(rbind.fill, list(DF1, DF2)), do.call(bind_rows, list(DF1, DF2))) | |
all.equal(do.call(rbind.fill, list(list(DF1, DF2))), do.call(bind_rows, list(list(DF1, DF2)))) | |
# column order mismatch | |
all.equal(rbind.fill(DF1, rev(DF2)), bind_rows(DF1, rev(DF2))) | |
# column name mismatch | |
DF2 = data.frame(a = 1, c = 3) | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## name and order mismatch | |
all.equal(rbind.fill(DF1, rev(DF2)), bind_rows(DF1, rev(DF2))) | |
## both argument orders | |
all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1)) | |
# no columns match | |
DF2 = data.frame(c = 3, d = 4) | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## both argument orders | |
all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1)) | |
# recursive columns | |
## data.frame | |
DF1 = data.frame(a = 1) | |
DF1$DF = data.frame(b = 2, c = 3) | |
DF2 = data.frame(a = 4) | |
DF2$DF = data.frame(b = 5, c = 6) | |
### plyr DOES NOT SUPPORT data.frame COLUMNS | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## matrix, matching column names | |
DF1$DF <- as.matrix(DF1$DF) | |
DF2$DF <- as.matrix(DF2$DF) | |
### dplyr drops column names! | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## matrix, mismatch column names | |
colnames(DF2$DF) <- c("d", "e") | |
### plyr ignores mismatch, dplyr drops column names | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## matrices with row names | |
colnames(DF1$DF) <- NULL | |
colnames(DF2$DF) <- NULL | |
rownames(DF1$DF) <- 'x' | |
rownames(DF2$DF) <- 'y' | |
### plyr ignores row names, dplyr retains | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## list, unnamed | |
DF1 = data.frame(a = 1) | |
DF1$l = list(2) | |
DF2 = data.frame(a = 3) | |
DF2$l = list(4) | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## list, named | |
DF1 = data.frame(a = 1) | |
DF1$l = list(b = 2) | |
DF2 = data.frame(a = 3) | |
DF2$l = list(b = 4) | |
### plyr drops list names, dplyr retains | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## list, mixed naming | |
names(DF2$l) = NULL | |
### plyr drops list names, dplyr retains | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## when fill is needed | |
DF2 = data.frame(a = 3) | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## both argument orders | |
all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1)) | |
# empty inputs | |
DF1 = data.frame(a = 1, b = 2) | |
DF2 = data.frame() | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
DF2 = data.frame(a = double(), b = double()) | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## 0 rows, new columns | |
DF2 = data.frame(c = integer(), d = integer()) | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## mismatch types | |
DF2 = data.frame(a = character(), b = character()) | |
### dplyr DOES NOT COMBINE EVEN 0-ROW MISMATCHES [SEE BELOW] | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
# column type mismatch | |
## -> logical | |
DF_r = data.frame(r = as.raw(0)) | |
DF_l = data.frame(v = TRUE) | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_r, DF_l), bind_rows(DF_r, DF_l)) | |
### all.equal(rbind.fill(DF_l, DF_r), bind_rows(DF_l, DF_r)) | |
## -> integer | |
DF_i = data.frame(v = 0L | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_r, DF_i), bind_rows(DF_r, DF_i)) | |
all.equal(rbind.fill(DF_l, DF_i), bind_rows(DF_l, DF_i)) | |
### all.equal(rbind.fill(DF_i, DF_r), bind_rows(DF_i, DF_r)) | |
all.equal(rbind.fill(DF_i, DF_l), bind_rows(DF_i, DF_l)) | |
## -> double | |
DF_d = data.frame(v = 0.0) | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_r, DF_d), bind_rows(DF_r, DF_d)) | |
all.equal(rbind.fill(DF_l, DF_d), bind_rows(DF_l, DF_d)) | |
all.equal(rbind.fill(DF_i, DF_d), bind_rows(DF_i, DF_d)) | |
### all.equal(rbind.fill(DF_d, DF_r), bind_rows(DF_d, DF_r)) | |
all.equal(rbind.fill(DF_d, DF_l), bind_rows(DF_d, DF_l)) | |
all.equal(rbind.fill(DF_d, DF_i), bind_rows(DF_d, DF_i)) | |
## -> complex | |
DF_c = data.frame(v = 0.0 + 1.0i) | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_r, DF_c), bind_rows(DF_r, DF_c)) | |
### SEE https://github.com/tidyverse/dplyr/issues/7685 | |
### all.equal(rbind.fill(DF_l, DF_c), bind_rows(DF_l, DF_c)) | |
all.equal(rbind.fill(DF_i, DF_c), bind_rows(DF_i, DF_c)) | |
all.equal(rbind.fill(DF_d, DF_c), bind_rows(DF_d, DF_c)) | |
### all.equal(rbind.fill(DF_c, DF_r), bind_rows(DF_c, DF_r)) | |
### all.equal(rbind.fill(DF_c, DF_l), bind_rows(DF_c, DF_l)) | |
all.equal(rbind.fill(DF_c, DF_i), bind_rows(DF_c, DF_i)) | |
all.equal(rbind.fill(DF_c, DF_d), bind_rows(DF_c, DF_d)) | |
## -> character | |
DF_s = data.frame(v = 'a') | |
### dplyr::bind_rows() blocks -> character conversion! | |
### all.equal(rbind.fill(DF_r, DF_s), bind_rows(DF_r, DF_s)) | |
### all.equal(rbind.fill(DF_l, DF_s), bind_rows(DF_l, DF_s)) | |
### all.equal(rbind.fill(DF_i, DF_s), bind_rows(DF_i, DF_s)) | |
### all.equal(rbind.fill(DF_d, DF_s), bind_rows(DF_d, DF_s)) | |
### all.equal(rbind.fill(DF_c, DF_s), bind_rows(DF_c, DF_s)) | |
### all.equal(rbind.fill(DF_s, DF_r), bind_rows(DF_s, DF_r)) | |
### all.equal(rbind.fill(DF_s, DF_l), bind_rows(DF_s, DF_l)) | |
### all.equal(rbind.fill(DF_s, DF_i), bind_rows(DF_s, DF_i)) | |
### all.equal(rbind.fill(DF_s, DF_d), bind_rows(DF_s, DF_d)) | |
### all.equal(rbind.fill(DF_s, DF_c), bind_rows(DF_s, DF_c)) | |
## -> list | |
DF_t = data.frame(v = 1L) | |
DF_t$v = list(1:2) | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_r, DF_t), bind_rows(DF_r, DF_t)) | |
### dplyr::bind_rows() blocks -> list conversion! | |
### all.equal(rbind.fill(DF_l, DF_t), bind_rows(DF_l, DF_t)) | |
### all.equal(rbind.fill(DF_i, DF_t), bind_rows(DF_i, DF_t)) | |
### all.equal(rbind.fill(DF_d, DF_t), bind_rows(DF_d, DF_t)) | |
### all.equal(rbind.fill(DF_c, DF_t), bind_rows(DF_c, DF_t)) | |
### all.equal(rbind.fill(DF_s, DF_t), bind_rows(DF_s, DF_t)) | |
### all.equal(rbind.fill(DF_t, DF_r), bind_rows(DF_t, DF_r)) | |
### all.equal(rbind.fill(DF_t, DF_l), bind_rows(DF_t, DF_l)) | |
### all.equal(rbind.fill(DF_t, DF_i), bind_rows(DF_t, DF_i)) | |
### all.equal(rbind.fill(DF_t, DF_d), bind_rows(DF_t, DF_d)) | |
### all.equal(rbind.fill(DF_t, DF_c), bind_rows(DF_t, DF_c)) | |
### all.equal(rbind.fill(DF_t, DF_s), bind_rows(DF_t, DF_s)) | |
# column type mismatch, one frame is missing | |
## -> integer | |
DF_l_NA = data.frame(v = NA) | |
DF_i_NA = data.frame(v = NA_integer_) | |
DF_d_NA = data.frame(v = NA_real_) | |
DF_c_NA = data.frame(v = NA_complex_) | |
DF_s_NA = data.frame(v = NA_character_) | |
## -> logical | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_l_NA, DF_r), bind_rows(DF_l_NA, DF_r)) | |
## -> integer | |
all.equal(rbind.fill(DF_l_NA, DF_i), bind_rows(DF_l_NA, DF_i)) | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_i_NA, DF_r), bind_rows(DF_i_NA, DF_r)) | |
all.equal(rbind.fill(DF_i_NA, DF_l), bind_rows(DF_i_NA, DF_l)) | |
## -> double | |
all.equal(rbind.fill(DF_l_NA, DF_d), bind_rows(DF_l_NA, DF_d)) | |
all.equal(rbind.fill(DF_i_NA, DF_d), bind_rows(DF_i_NA, DF_d)) | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_d_NA, DF_r), bind_rows(DF_d_NA, DF_r)) | |
all.equal(rbind.fill(DF_d_NA, DF_l), bind_rows(DF_d_NA, DF_l)) | |
all.equal(rbind.fill(DF_d_NA, DF_i), bind_rows(DF_d_NA, DF_i)) | |
## -> complex | |
### SEE https://github.com/tidyverse/dplyr/issues/7685 | |
all.equal(rbind.fill(DF_l_NA, DF_c), bind_rows(DF_l_NA, DF_c)) | |
all.equal(rbind.fill(DF_i_NA, DF_c), bind_rows(DF_i_NA, DF_c)) | |
all.equal(rbind.fill(DF_d_NA, DF_c), bind_rows(DF_d_NA, DF_c)) | |
### raw NOT SUPPORTED BY plyr | |
### all.equal(rbind.fill(DF_c_NA, DF_r), bind_rows(DF_c_NA, DF_r)) | |
### SEE https://github.com/tidyverse/dplyr/issues/7685 | |
### all.equal(rbind.fill(DF_c_NA, DF_l), bind_rows(DF_c_NA, DF_l)) | |
all.equal(rbind.fill(DF_c_NA, DF_i), bind_rows(DF_c_NA, DF_i)) | |
all.equal(rbind.fill(DF_c_NA, DF_d), bind_rows(DF_c_NA, DF_d)) | |
## -> character | |
### dplyr::bind_rows() blocks -> character conversion, mostly! | |
all.equal(rbind.fill(DF_l_NA, DF_s), bind_rows(DF_l_NA, DF_s)) | |
### all.equal(rbind.fill(DF_i_NA, DF_s), bind_rows(DF_i_NA, DF_s)) | |
### all.equal(rbind.fill(DF_d_NA, DF_s), bind_rows(DF_d_NA, DF_s)) | |
### all.equal(rbind.fill(DF_c_NA, DF_s), bind_rows(DF_c_NA, DF_s)) | |
### all.equal(rbind.fill(DF_s_NA, DF_r), bind_rows(DF_s_NA, DF_r)) | |
### all.equal(rbind.fill(DF_s_NA, DF_l), bind_rows(DF_s_NA, DF_l)) | |
### all.equal(rbind.fill(DF_s_NA, DF_i), bind_rows(DF_s_NA, DF_i)) | |
### all.equal(rbind.fill(DF_s_NA, DF_d), bind_rows(DF_s_NA, DF_d)) | |
### all.equal(rbind.fill(DF_s_NA, DF_c), bind_rows(DF_s_NA, DF_c)) | |
# row names handling | |
DF1 = data.frame(a = 1, row.names = 'a') | |
DF2 = data.frame(a = 2, row.names = 'b') | |
### plyr DROPS STRING ROW NAMES | |
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
## both dplyr & plyr drop integer row names | |
rownames(DF1) = 2L | |
rownames(DF2) = 3L | |
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2)) | |
# factors | |
DF_f1 = data.frame(v = factor('a')) | |
DF_f2 = data.frame(v = factor('a', levels = c('a', 'b'))) | |
DF_f3 = data.frame(v = factor(NA, levels = 'a')) | |
DF_f4 = data.frame(v = factor(NA, levels = 'd')) | |
DF_f5 = data.frame(v = factor('a', levels = c('b', 'a'))) | |
DF_f6 = data.frame(v = factor('1')) | |
DF_f7 = data.frame(v = factor('b')) | |
DF_f8 = data.frame(v = factor('a', levels = c('a', 'c'))) | |
DF_c1 = data.frame(v = 'a') | |
DF_c2 = data.frame(v = 'b') | |
DF_c3 = data.frame(v = 'NA') | |
DF_c4 = data.frame(v = NA_character_) | |
DF_x = data.frame(x = 1.0) | |
DF_i1 = data.frame(v = 1L) | |
DF_i2 = data.frame(v = 100L) | |
DF_i3 = data.frame(v = NA_integer_) | |
all.equal(rbind.fill(DF_f1, DF_f2), bind_rows(DF_f1, DF_f2)) | |
all.equal(rbind.fill(DF_f1, DF_c1), bind_rows(DF_f1, DF_c1)) | |
all.equal(rbind.fill(DF_f1, DF_c2), bind_rows(DF_f1, DF_c2)) | |
all.equal(rbind.fill(DF_f1, DF_f3), bind_rows(DF_f1, DF_f3)) | |
all.equal(rbind.fill(DF_f1, DF_f4), bind_rows(DF_f1, DF_f4)) | |
all.equal(rbind.fill(DF_f1, DF_x), bind_rows(DF_f1, DF_x)) | |
all.equal(rbind.fill(DF_f1, DF_c3), bind_rows(DF_f1, DF_c3)) | |
all.equal(rbind.fill(DF_f3, DF_c3), bind_rows(DF_f3, DF_c3)) | |
## surprisingly... | |
all.equal(rbind.fill(DF_f2, DF_f5), bind_rows(DF_f2, DF_f5)) | |
### dplyr REFUSES TO COERCE integer --> factor/character | |
### all.equal(rbind.fill(DF_f1, DF_i1), bind_rows(DF_f1, DF_i1)) | |
### all.equal(rbind.fill(DF_f1, DF_i2), bind_rows(DF_f1, DF_i2)) | |
### EVEN IF THE INTEGER IS A FACTOR LEVEL | |
### all.equal(rbind.fill(DF_f6, DF_i1), bind_rows(DF_f6, DF_i1)) | |
### EVEN IF THE INTEGER IS MISSING | |
### all.equal(rbind.fill(DF_f1, DF_i3), bind_rows(DF_f1, DF_i3)) | |
all.equal(rbind.fill(DF_f1, DF_f7), bind_rows(DF_f1, DF_f7)) | |
all.equal(rbind.fill(DF_f2, DF_f8), bind_rows(DF_f2, DF_f8)) | |
all.equal(rbind.fill(DF_f1, DF_c4), bind_rows(DF_f1, DF_c4)) | |
all.equal(rbind.fill(DF_f3, DF_c4), bind_rows(DF_f3, DF_c4)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment