Created
October 8, 2021 19:45
-
-
Save djnavarro/01ac88f94252414aa379ff9d02f836a7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set.seed(1) | |
sample_values <- function(n, p_safe) { | |
val <- rep("safe", n) | |
val[runif(n) > p_safe] <- "UNSAFE\n" | |
return(val) | |
} | |
# the wild-caught data had this structure | |
old_df <- tibble::tibble( | |
a = sample_values(1200, p = .99), | |
b = sample_values(1200, p = .01), | |
c = sample_values(1200, p = .01) | |
) | |
# write to temp file | |
path <- tempfile(pattern = "quoted_newlines_", fileext = ".csv") | |
write.csv(old_df, path, row.names = FALSE) | |
# read data without setting num_threads (throws warning) | |
new_df <- readr::read_csv(path, lazy = FALSE) | |
# the weirdness! | |
waldo::compare(old_df, new_df) # yes, it parses correctly... | |
readr::problems(new_df) # ...but problems have been logged | |
# read data setting num_threads = 1 (no warning | |
new_df <- readr::read_csv(path, lazy = FALSE, num_threads = 1) | |
# weirdness vanishes | |
waldo::compare(old_df, new_df) | |
readr::problems(new_df) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment