Mark Andrews mark-andrews

Reading in multiple csv files as data frames and concatenating (or row binding) them into one data frame is a task we routinely face.

In R, there are many ways of doing it. But which is the best, and why? I think the best way is the simplest and most high level way; something that is easy to read and write and edit. Here are three variants of what I think is the right way. The first is close to a base R way (except for the use of read_csv and the beloved pipe),the second uses purrr and dplyr, and the third just uses purrr.

library(readr)
library(tibble)
library(purrr)

	correct_response_map_1 <- or$correct[1:8]
	correct_response_map_2 <- or$correct.1[1:8]
	names(correct_response_map_1) <- or$key_config_1[1:8]
	names(correct_response_map_2) <- or$key_config_2[1:8]

	new_or %>% mutate(correct_response_num_if_1 = correct_response_map_1[stim_type],
	correct_response_num_if_2 = correct_response_map_2[stim_type],
	correct_reponse_num = (Key_config == 1)correct_response_num_if_1 + (Key_config == 2)correct_response_num_if_2,
	accuracy = correct_reponse_num == response_num)

	library(dplyr)
	library(readr)
	Df <- read_csv("data/AV_usereventsR_new.csv")

	# create a table with correct responses
	correct_responses <- with(Df,
	tibble(stimulus = rep(key_config_1[1:8],2),
	key_config = as.integer(c(rep(1,8), rep(2,8))),
	correct_response = c(correct[1:8], correct.1[1:8])
	)

	FROM archlinux:20200705

	RUN pacman -Sy \
	&& pacman -S --noconfirm \
	base-devel \
	git \
	make \
	r \
	sudo \
	vim \

	library(tidyverse)

	df <- tibble(d13C = c(1, NA, 3, 4, NA, 6),
	d15N = c(1, 2, NA, 4, NA, 6),
	d42 = c(1, NA, NA, 4, 5, NA))

	# to remove all rows from `df` that have missing values in `d13C`
	# but keep rows that have missing values in any other variable:
	filter(df, !is.na(d13C))

	library(tidyverse)
	library(rlang)

	ansur <- read_csv("http://data.ntupsychology.net/ansur.csv")

	describe <- function(data, by = NULL, ...){
	if (is.null(enexpr(by))){
	summarise(data, ...)
	} else {
	summarise(group_by(data, across({{ by }})), ..., .groups = 'drop')

	library(readr)
	music_df <- read_tsv(file =
	'Subject TimeGuess Music
	subj1 43 control
	subj2 18 control
	subj3 68 control
	subj4 26 control
	subj5 40 control
	subj6 47 control
	subj7 29 control

	import string
	from random import choice

	letters = list(string.ascii_uppercase[:9])

	n = 2
	K = 36

	stimuli = [None] * K
	response = [None] * K

	stimuli <- letters[1:9]

	n = 2
	K <- 36

	stimuli_list <- vector(mode = "list", length = K)
	response_list <- vector(mode = "list", length = K)

	for (i in seq(K)){
	if (i <= n){

	# This is a set of functions etc to illustrate the concept of a sampling
	# distribution using the example of sampling gold and silver coins
	# from a box.

	library(tidyverse)

	# set random number generator seed
	set.seed(10101)

	make_box <- function(N = 100, g = 0.5){