djnavarro · October 8, 2021 19:45
diff --git a/readr_threads_issue.R b/readr_threads_issue.R
 set.seed(1)

 sample_values <- function(n, p_safe) {
  val <- rep("safe", n)
  val[runif(n) > p_safe] <- "UNSAFE\n"
  return(val)
 }

 # the wild-caught data had this structure
 old_df <- tibble::tibble(
  a = sample_values(1200, p = .99),
  b = sample_values(1200, p = .01),
  c = sample_values(1200, p = .01)
 )

 # write to temp file
 path <- tempfile(pattern = "quoted_newlines_", fileext = ".csv")
 write.csv(old_df, path, row.names = FALSE)

 # read data without setting num_threads (throws warning)
 new_df <- readr::read_csv(path, lazy = FALSE)

 # the weirdness!
 waldo::compare(old_df, new_df) # yes, it parses correctly...
 readr::problems(new_df)        # ...but problems have been logged

 # read data setting num_threads = 1 (no warning
 new_df <- readr::read_csv(path, lazy = FALSE, num_threads = 1)

 # weirdness vanishes
 waldo::compare(old_df, new_df)
 readr::problems(new_df)
	set.seed(1)

	sample_values <- function(n, p_safe) {
	val <- rep("safe", n)
	val[runif(n) > p_safe] <- "UNSAFE\n"
	return(val)
	}

	# the wild-caught data had this structure
	old_df <- tibble::tibble(
	a = sample_values(1200, p = .99),
	b = sample_values(1200, p = .01),
	c = sample_values(1200, p = .01)
	)

	# write to temp file
	path <- tempfile(pattern = "quoted_newlines_", fileext = ".csv")
	write.csv(old_df, path, row.names = FALSE)

	# read data without setting num_threads (throws warning)
	new_df <- readr::read_csv(path, lazy = FALSE)

	# the weirdness!
	waldo::compare(old_df, new_df) # yes, it parses correctly...
	readr::problems(new_df) # ...but problems have been logged

	# read data setting num_threads = 1 (no warning
	new_df <- readr::read_csv(path, lazy = FALSE, num_threads = 1)

	# weirdness vanishes
	waldo::compare(old_df, new_df)
	readr::problems(new_df)