Skip to content

Instantly share code, notes, and snippets.

@MichaelChirico
Last active June 3, 2025 23:21
Show Gist options
  • Save MichaelChirico/8df9b42e0e42d32828b34a641d33dc8c to your computer and use it in GitHub Desktop.
Save MichaelChirico/8df9b42e0e42d32828b34a641d33dc8c to your computer and use it in GitHub Desktop.
Comparing plyr::rbind.fill and dplyr::bind_rows()
# quick look sheet for comparing plyr::rbind.fill --> dplyr::bind_rows()
# NB: I am only interested in migrating rbind.fill-->bind_rows(), so
# features of bind_rows() absent from rbind.fill(), e.g. .id=, are not examined.
rbind.fill = plyr::rbind.fill
bind_rows = dplyr::bind_rows
DF1 = data.frame(a = 1, b = 2)
DF2 = data.frame(a = 1, b = 2)
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
# API equivalence
all.equal(rbind.fill(list(DF1, DF2)), bind_rows(list(DF1, DF2)))
all.equal(do.call(rbind.fill, list(DF1, DF2)), do.call(bind_rows, list(DF1, DF2)))
all.equal(do.call(rbind.fill, list(list(DF1, DF2))), do.call(bind_rows, list(list(DF1, DF2))))
# column order mismatch
all.equal(rbind.fill(DF1, rev(DF2)), bind_rows(DF1, rev(DF2)))
# column name mismatch
DF2 = data.frame(a = 1, c = 3)
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## name and order mismatch
all.equal(rbind.fill(DF1, rev(DF2)), bind_rows(DF1, rev(DF2)))
## both argument orders
all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1))
# no columns match
DF2 = data.frame(c = 3, d = 4)
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## both argument orders
all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1))
# recursive columns
## data.frame
DF1 = data.frame(a = 1)
DF1$DF = data.frame(b = 2, c = 3)
DF2 = data.frame(a = 4)
DF2$DF = data.frame(b = 5, c = 6)
### plyr DOES NOT SUPPORT data.frame COLUMNS
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## matrix, matching column names
DF1$DF <- as.matrix(DF1$DF)
DF2$DF <- as.matrix(DF2$DF)
### dplyr drops column names!
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## matrix, mismatch column names
colnames(DF2$DF) <- c("d", "e")
### plyr ignores mismatch, dplyr drops column names
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## matrices with row names
colnames(DF1$DF) <- NULL
colnames(DF2$DF) <- NULL
rownames(DF1$DF) <- 'x'
rownames(DF2$DF) <- 'y'
### plyr ignores row names, dplyr retains
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## list, unnamed
DF1 = data.frame(a = 1)
DF1$l = list(2)
DF2 = data.frame(a = 3)
DF2$l = list(4)
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## list, named
DF1 = data.frame(a = 1)
DF1$l = list(b = 2)
DF2 = data.frame(a = 3)
DF2$l = list(b = 4)
### plyr drops list names, dplyr retains
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## list, mixed naming
names(DF2$l) = NULL
### plyr drops list names, dplyr retains
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## when fill is needed
DF2 = data.frame(a = 3)
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## both argument orders
all.equal(rbind.fill(DF2, DF1), bind_rows(DF2, DF1))
# empty inputs
DF1 = data.frame(a = 1, b = 2)
DF2 = data.frame()
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
DF2 = data.frame(a = double(), b = double())
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## 0 rows, new columns
DF2 = data.frame(c = integer(), d = integer())
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## mismatch types
DF2 = data.frame(a = character(), b = character())
### dplyr DOES NOT COMBINE EVEN 0-ROW MISMATCHES [SEE BELOW]
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
# column type mismatch
## -> logical
DF_r = data.frame(r = as.raw(0))
DF_l = data.frame(v = TRUE)
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_r, DF_l), bind_rows(DF_r, DF_l))
### all.equal(rbind.fill(DF_l, DF_r), bind_rows(DF_l, DF_r))
## -> integer
DF_i = data.frame(v = 0L
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_r, DF_i), bind_rows(DF_r, DF_i))
all.equal(rbind.fill(DF_l, DF_i), bind_rows(DF_l, DF_i))
### all.equal(rbind.fill(DF_i, DF_r), bind_rows(DF_i, DF_r))
all.equal(rbind.fill(DF_i, DF_l), bind_rows(DF_i, DF_l))
## -> double
DF_d = data.frame(v = 0.0)
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_r, DF_d), bind_rows(DF_r, DF_d))
all.equal(rbind.fill(DF_l, DF_d), bind_rows(DF_l, DF_d))
all.equal(rbind.fill(DF_i, DF_d), bind_rows(DF_i, DF_d))
### all.equal(rbind.fill(DF_d, DF_r), bind_rows(DF_d, DF_r))
all.equal(rbind.fill(DF_d, DF_l), bind_rows(DF_d, DF_l))
all.equal(rbind.fill(DF_d, DF_i), bind_rows(DF_d, DF_i))
## -> complex
DF_c = data.frame(v = 0.0 + 1.0i)
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_r, DF_c), bind_rows(DF_r, DF_c))
### SEE https://github.com/tidyverse/dplyr/issues/7685
### all.equal(rbind.fill(DF_l, DF_c), bind_rows(DF_l, DF_c))
all.equal(rbind.fill(DF_i, DF_c), bind_rows(DF_i, DF_c))
all.equal(rbind.fill(DF_d, DF_c), bind_rows(DF_d, DF_c))
### all.equal(rbind.fill(DF_c, DF_r), bind_rows(DF_c, DF_r))
### all.equal(rbind.fill(DF_c, DF_l), bind_rows(DF_c, DF_l))
all.equal(rbind.fill(DF_c, DF_i), bind_rows(DF_c, DF_i))
all.equal(rbind.fill(DF_c, DF_d), bind_rows(DF_c, DF_d))
## -> character
DF_s = data.frame(v = 'a')
### dplyr::bind_rows() blocks -> character conversion!
### all.equal(rbind.fill(DF_r, DF_s), bind_rows(DF_r, DF_s))
### all.equal(rbind.fill(DF_l, DF_s), bind_rows(DF_l, DF_s))
### all.equal(rbind.fill(DF_i, DF_s), bind_rows(DF_i, DF_s))
### all.equal(rbind.fill(DF_d, DF_s), bind_rows(DF_d, DF_s))
### all.equal(rbind.fill(DF_c, DF_s), bind_rows(DF_c, DF_s))
### all.equal(rbind.fill(DF_s, DF_r), bind_rows(DF_s, DF_r))
### all.equal(rbind.fill(DF_s, DF_l), bind_rows(DF_s, DF_l))
### all.equal(rbind.fill(DF_s, DF_i), bind_rows(DF_s, DF_i))
### all.equal(rbind.fill(DF_s, DF_d), bind_rows(DF_s, DF_d))
### all.equal(rbind.fill(DF_s, DF_c), bind_rows(DF_s, DF_c))
## -> list
DF_t = data.frame(v = 1L)
DF_t$v = list(1:2)
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_r, DF_t), bind_rows(DF_r, DF_t))
### dplyr::bind_rows() blocks -> list conversion!
### all.equal(rbind.fill(DF_l, DF_t), bind_rows(DF_l, DF_t))
### all.equal(rbind.fill(DF_i, DF_t), bind_rows(DF_i, DF_t))
### all.equal(rbind.fill(DF_d, DF_t), bind_rows(DF_d, DF_t))
### all.equal(rbind.fill(DF_c, DF_t), bind_rows(DF_c, DF_t))
### all.equal(rbind.fill(DF_s, DF_t), bind_rows(DF_s, DF_t))
### all.equal(rbind.fill(DF_t, DF_r), bind_rows(DF_t, DF_r))
### all.equal(rbind.fill(DF_t, DF_l), bind_rows(DF_t, DF_l))
### all.equal(rbind.fill(DF_t, DF_i), bind_rows(DF_t, DF_i))
### all.equal(rbind.fill(DF_t, DF_d), bind_rows(DF_t, DF_d))
### all.equal(rbind.fill(DF_t, DF_c), bind_rows(DF_t, DF_c))
### all.equal(rbind.fill(DF_t, DF_s), bind_rows(DF_t, DF_s))
# column type mismatch, one frame is missing
## -> integer
DF_l_NA = data.frame(v = NA)
DF_i_NA = data.frame(v = NA_integer_)
DF_d_NA = data.frame(v = NA_real_)
DF_c_NA = data.frame(v = NA_complex_)
DF_s_NA = data.frame(v = NA_character_)
## -> logical
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_l_NA, DF_r), bind_rows(DF_l_NA, DF_r))
## -> integer
all.equal(rbind.fill(DF_l_NA, DF_i), bind_rows(DF_l_NA, DF_i))
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_i_NA, DF_r), bind_rows(DF_i_NA, DF_r))
all.equal(rbind.fill(DF_i_NA, DF_l), bind_rows(DF_i_NA, DF_l))
## -> double
all.equal(rbind.fill(DF_l_NA, DF_d), bind_rows(DF_l_NA, DF_d))
all.equal(rbind.fill(DF_i_NA, DF_d), bind_rows(DF_i_NA, DF_d))
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_d_NA, DF_r), bind_rows(DF_d_NA, DF_r))
all.equal(rbind.fill(DF_d_NA, DF_l), bind_rows(DF_d_NA, DF_l))
all.equal(rbind.fill(DF_d_NA, DF_i), bind_rows(DF_d_NA, DF_i))
## -> complex
### SEE https://github.com/tidyverse/dplyr/issues/7685
all.equal(rbind.fill(DF_l_NA, DF_c), bind_rows(DF_l_NA, DF_c))
all.equal(rbind.fill(DF_i_NA, DF_c), bind_rows(DF_i_NA, DF_c))
all.equal(rbind.fill(DF_d_NA, DF_c), bind_rows(DF_d_NA, DF_c))
### raw NOT SUPPORTED BY plyr
### all.equal(rbind.fill(DF_c_NA, DF_r), bind_rows(DF_c_NA, DF_r))
### SEE https://github.com/tidyverse/dplyr/issues/7685
### all.equal(rbind.fill(DF_c_NA, DF_l), bind_rows(DF_c_NA, DF_l))
all.equal(rbind.fill(DF_c_NA, DF_i), bind_rows(DF_c_NA, DF_i))
all.equal(rbind.fill(DF_c_NA, DF_d), bind_rows(DF_c_NA, DF_d))
## -> character
### dplyr::bind_rows() blocks -> character conversion, mostly!
all.equal(rbind.fill(DF_l_NA, DF_s), bind_rows(DF_l_NA, DF_s))
### all.equal(rbind.fill(DF_i_NA, DF_s), bind_rows(DF_i_NA, DF_s))
### all.equal(rbind.fill(DF_d_NA, DF_s), bind_rows(DF_d_NA, DF_s))
### all.equal(rbind.fill(DF_c_NA, DF_s), bind_rows(DF_c_NA, DF_s))
### all.equal(rbind.fill(DF_s_NA, DF_r), bind_rows(DF_s_NA, DF_r))
### all.equal(rbind.fill(DF_s_NA, DF_l), bind_rows(DF_s_NA, DF_l))
### all.equal(rbind.fill(DF_s_NA, DF_i), bind_rows(DF_s_NA, DF_i))
### all.equal(rbind.fill(DF_s_NA, DF_d), bind_rows(DF_s_NA, DF_d))
### all.equal(rbind.fill(DF_s_NA, DF_c), bind_rows(DF_s_NA, DF_c))
# row names handling
DF1 = data.frame(a = 1, row.names = 'a')
DF2 = data.frame(a = 2, row.names = 'b')
### plyr DROPS STRING ROW NAMES
### all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
## both dplyr & plyr drop integer row names
rownames(DF1) = 2L
rownames(DF2) = 3L
all.equal(rbind.fill(DF1, DF2), bind_rows(DF1, DF2))
# factors
DF_f1 = data.frame(v = factor('a'))
DF_f2 = data.frame(v = factor('a', levels = c('a', 'b')))
DF_f3 = data.frame(v = factor(NA, levels = 'a'))
DF_f4 = data.frame(v = factor(NA, levels = 'd'))
DF_f5 = data.frame(v = factor('a', levels = c('b', 'a')))
DF_f6 = data.frame(v = factor('1'))
DF_f7 = data.frame(v = factor('b'))
DF_f8 = data.frame(v = factor('a', levels = c('a', 'c')))
DF_c1 = data.frame(v = 'a')
DF_c2 = data.frame(v = 'b')
DF_c3 = data.frame(v = 'NA')
DF_c4 = data.frame(v = NA_character_)
DF_x = data.frame(x = 1.0)
DF_i1 = data.frame(v = 1L)
DF_i2 = data.frame(v = 100L)
DF_i3 = data.frame(v = NA_integer_)
all.equal(rbind.fill(DF_f1, DF_f2), bind_rows(DF_f1, DF_f2))
all.equal(rbind.fill(DF_f1, DF_c1), bind_rows(DF_f1, DF_c1))
all.equal(rbind.fill(DF_f1, DF_c2), bind_rows(DF_f1, DF_c2))
all.equal(rbind.fill(DF_f1, DF_f3), bind_rows(DF_f1, DF_f3))
all.equal(rbind.fill(DF_f1, DF_f4), bind_rows(DF_f1, DF_f4))
all.equal(rbind.fill(DF_f1, DF_x), bind_rows(DF_f1, DF_x))
all.equal(rbind.fill(DF_f1, DF_c3), bind_rows(DF_f1, DF_c3))
all.equal(rbind.fill(DF_f3, DF_c3), bind_rows(DF_f3, DF_c3))
## surprisingly...
all.equal(rbind.fill(DF_f2, DF_f5), bind_rows(DF_f2, DF_f5))
### dplyr REFUSES TO COERCE integer --> factor/character
### all.equal(rbind.fill(DF_f1, DF_i1), bind_rows(DF_f1, DF_i1))
### all.equal(rbind.fill(DF_f1, DF_i2), bind_rows(DF_f1, DF_i2))
### EVEN IF THE INTEGER IS A FACTOR LEVEL
### all.equal(rbind.fill(DF_f6, DF_i1), bind_rows(DF_f6, DF_i1))
### EVEN IF THE INTEGER IS MISSING
### all.equal(rbind.fill(DF_f1, DF_i3), bind_rows(DF_f1, DF_i3))
all.equal(rbind.fill(DF_f1, DF_f7), bind_rows(DF_f1, DF_f7))
all.equal(rbind.fill(DF_f2, DF_f8), bind_rows(DF_f2, DF_f8))
all.equal(rbind.fill(DF_f1, DF_c4), bind_rows(DF_f1, DF_c4))
all.equal(rbind.fill(DF_f3, DF_c4), bind_rows(DF_f3, DF_c4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment