rBatt · March 31, 2016 18:07
diff --git a/read_jho.R b/read_jho.R
 #' Read JHo
 #' 
 #' Read in a file formatted like Jennifer's data sets
 #' 
 #' @param file a character indicating the file name (and possibly path)
 #' @param n_nrows integer indicating number of rows for each matrix
 #' @param n_matrices integer indicating number of matrices in the file
 #' @param n_comment integer indicating the number of comment lines preceding each matrix of data; should include blank lines
 #' @param n_top_comment integer indicating the number of comment lines that are unique to the start of the file; default is 0 lines
 #' @param ... arguments to be passed to \code{scan}, such as \code{what=numeric()} and \code{sep=","}
 #' 
 #' @details
 #' Jennifer said she had a file that had several matrices interspersed with text comments. This is an approach that I think should help read in that data set
 #' 
 #' Arguments to \code{scan} need to be specified. To read in example data, I used \code{read_jho(file_name, 7, 5, 3, 1, what=numeric(), sep=",")}.
 #' 
 #' @return
 #' Returns an array whose first two dimensions are equal to the dimensions of each matrix, and the size of the third dimension is the number of matrices
 #' 
 #' @export
 read_jho <- function(file, n_rows, n_matrices, n_comment, n_top_comment=0, ...){
 	stopifnot(file.exists(file)) # check that file exists

 	data_read <- list()
 	for(i in 1:n_matrices){
 		skip_first_line <- ifelse(i==1, n_top_comment, 0) 
 		line_start <- (i-1)*(n_comment+n_rows) + (1+n_comment)
 		data_read[[i]] <- scan(file, skip=line_start, nlines=n_rows, ...) 
 		data_read[[i]] <- matrix(data_read[[i]], nrow=n_rows)
 	}

 	data_array <- simplify2array(data_read)
 	
 	return(data_array)
 }
diff --git a/read_jho_script.R b/read_jho_script.R


 # =========
 # = Setup =
 # =========

 # ---- Data Dimensions ----
 # All used for generating fake file
 # Only 2 needed to read in the fake file
 mat_columns <- 3 # don't need to know for reading in
 mat_rows <- 7 # needed for reading
 n_matrices <- 5 # needed for reading

 # ---- Prepare Content ----
 # Numeric Content
 # Create a shorthand function for generating random numeric matrix (nm)
 # This function uses R's scoping to find the dimensions, rather than
 # the dimensions being supplied as arguments to the function
 make_nm <- function(){matrix(rnorm(mat_rows*mat_columns), nrow=mat_rows, ncol=mat_columns)}

 # Below is version of function that requires you to supply the 2 arguments
 # I.e., make_nm() doesn't work with the function below, but works with the one above
 # For below, have to do make_nm(7, 3), e.g.
 # make_nm <- function(mat_rows, mat_columns){matrix(rnorm(mat_rows*mat_columns), nrow=mat_rows, ncol=mat_columns)}

 # Gibberish/ Header Content
 # Define gibberish values to be ignored when reading
 # Could be any text that needs to be skipped
 # If text has really weird characters, could maybe cause problems?
 # This is simple example, though
 animal_matrix <- matrix(c("cat","dog","sheep"),ncol=1)


 # ===============
 # = Create File =
 # ===============

 # ---- Create File ----
 # Define File Name
 file_name <- "~/Desktop/jho_test.csv"

 # Create file (no append)
 # V1 is an arbitrary fake header text
 # I don't want to append (write new file)
 # I don't want row or column names (just adds extra junk)
 write.table("V1", file=file_name, append=FALSE, row.names=FALSE, col.names=FALSE) # creates file, V1 only

 # ---- Add to File ----
 # Add to file (append)
 for(i in 1:n_matrices){ # loop through, adding lines
 	# Now append is true, because adding extra lines to file
 	write.table(animal_matrix, file=file_name, append=TRUE, row.names=FALSE, col.names=FALSE) # gibberish
 	write.table(make_nm(), file=file_name, append=TRUE, row.names=FALSE, col.names=FALSE, sep=",") # numeric
 }


 # =============
 # = Read File =
 # =============

 # ---- Data Dimensions Needed for Read ----
 # See "Setup" Section Above
 bonus_top_lines <- nrow(animal_matrix) # these would be gibberish comments to skip
 good_lines <- mat_rows # these are the data lines

 # ---- Read Data 1 Matrix at a Time ----
 # Create empty list
 data_read <- list()

 # Loop and Read
 for(i in 1:n_matrices){
 	
 	# Extra Lines to Ignore if 1st Iteration
 	# Just to ignore the V1 header (only needed first time)
 	# If no random header unique to start of file, set to 0. 
 	# If random header at at only start of file is more than 1 line, say 2 lines, just do ifelse(i==1, 2, 0)
 	skip_first_line <- ifelse(i==1, 1, 0) 
 	
 	# Determine Line of File to Start Reading for this Iteration
 	lines_read_previously <- (i-1)*(bonus_top_lines+good_lines)
 	line_start <- lines_read_previously + (1+bonus_top_lines) # the 1 is to start *after* where you ended last
 	
 	# Read in Desired Portion of File
 	# The 'sep' argument should refer to what separates values in the numeric part 
 	# See the `write.table(make_nm(), ...` piece of code under 'Add to File' section above
 	data_read[[i]] <- scan(file_name, skip=line_start, nlines=good_lines, what=numeric(), sep=",") 
 	
 	# Format to Matrix
 	# The value returned from `scan()` is a vector
 	# The number of columns is inferred; can be set explicitly for clarity, if known
 	data_read[[i]] <- matrix(data_read[[i]], nrow=good_lines)
 }


 # =============================================
 # = Take Element-Wise Average Across Matrices =
 # =============================================

 # Format to array
 data_array <- simplify2array(data_read)

 # Take average
 data_array_mean <- apply(data_array, c(1,2), mean)
	#' Read JHo
	#'
	#' Read in a file formatted like Jennifer's data sets
	#'
	#' @param file a character indicating the file name (and possibly path)
	#' @param n_nrows integer indicating number of rows for each matrix
	#' @param n_matrices integer indicating number of matrices in the file
	#' @param n_comment integer indicating the number of comment lines preceding each matrix of data; should include blank lines
	#' @param n_top_comment integer indicating the number of comment lines that are unique to the start of the file; default is 0 lines
	#' @param ... arguments to be passed to \code{scan}, such as \code{what=numeric()} and \code{sep=","}
	#'
	#' @details
	#' Jennifer said she had a file that had several matrices interspersed with text comments. This is an approach that I think should help read in that data set
	#'
	#' Arguments to \code{scan} need to be specified. To read in example data, I used \code{read_jho(file_name, 7, 5, 3, 1, what=numeric(), sep=",")}.
	#'
	#' @return
	#' Returns an array whose first two dimensions are equal to the dimensions of each matrix, and the size of the third dimension is the number of matrices
	#'
	#' @export
	read_jho <- function(file, n_rows, n_matrices, n_comment, n_top_comment=0, ...){
	stopifnot(file.exists(file)) # check that file exists

	data_read <- list()
	for(i in 1:n_matrices){
	skip_first_line <- ifelse(i==1, n_top_comment, 0)
	line_start <- (i-1)*(n_comment+n_rows) + (1+n_comment)
	data_read[[i]] <- scan(file, skip=line_start, nlines=n_rows, ...)
	data_read[[i]] <- matrix(data_read[[i]], nrow=n_rows)
	}

	data_array <- simplify2array(data_read)

	return(data_array)
	}


	# =========
	# = Setup =
	# =========

	# ---- Data Dimensions ----
	# All used for generating fake file
	# Only 2 needed to read in the fake file
	mat_columns <- 3 # don't need to know for reading in
	mat_rows <- 7 # needed for reading
	n_matrices <- 5 # needed for reading

	# ---- Prepare Content ----
	# Numeric Content
	# Create a shorthand function for generating random numeric matrix (nm)
	# This function uses R's scoping to find the dimensions, rather than
	# the dimensions being supplied as arguments to the function
	make_nm <- function(){matrix(rnorm(mat_rows*mat_columns), nrow=mat_rows, ncol=mat_columns)}

	# Below is version of function that requires you to supply the 2 arguments
	# I.e., make_nm() doesn't work with the function below, but works with the one above
	# For below, have to do make_nm(7, 3), e.g.
	# make_nm <- function(mat_rows, mat_columns){matrix(rnorm(mat_rows*mat_columns), nrow=mat_rows, ncol=mat_columns)}

	# Gibberish/ Header Content
	# Define gibberish values to be ignored when reading
	# Could be any text that needs to be skipped
	# If text has really weird characters, could maybe cause problems?
	# This is simple example, though
	animal_matrix <- matrix(c("cat","dog","sheep"),ncol=1)


	# ===============
	# = Create File =
	# ===============

	# ---- Create File ----
	# Define File Name
	file_name <- "~/Desktop/jho_test.csv"

	# Create file (no append)
	# V1 is an arbitrary fake header text
	# I don't want to append (write new file)
	# I don't want row or column names (just adds extra junk)
	write.table("V1", file=file_name, append=FALSE, row.names=FALSE, col.names=FALSE) # creates file, V1 only

	# ---- Add to File ----
	# Add to file (append)
	for(i in 1:n_matrices){ # loop through, adding lines
	# Now append is true, because adding extra lines to file
	write.table(animal_matrix, file=file_name, append=TRUE, row.names=FALSE, col.names=FALSE) # gibberish
	write.table(make_nm(), file=file_name, append=TRUE, row.names=FALSE, col.names=FALSE, sep=",") # numeric
	}


	# =============
	# = Read File =
	# =============

	# ---- Data Dimensions Needed for Read ----
	# See "Setup" Section Above
	bonus_top_lines <- nrow(animal_matrix) # these would be gibberish comments to skip
	good_lines <- mat_rows # these are the data lines

	# ---- Read Data 1 Matrix at a Time ----
	# Create empty list
	data_read <- list()

	# Loop and Read
	for(i in 1:n_matrices){

	# Extra Lines to Ignore if 1st Iteration
	# Just to ignore the V1 header (only needed first time)
	# If no random header unique to start of file, set to 0.
	# If random header at at only start of file is more than 1 line, say 2 lines, just do ifelse(i==1, 2, 0)
	skip_first_line <- ifelse(i==1, 1, 0)

	# Determine Line of File to Start Reading for this Iteration
	lines_read_previously <- (i-1)*(bonus_top_lines+good_lines)
	line_start <- lines_read_previously + (1+bonus_top_lines) # the 1 is to start after where you ended last

	# Read in Desired Portion of File
	# The 'sep' argument should refer to what separates values in the numeric part
	# See the `write.table(make_nm(), ...` piece of code under 'Add to File' section above
	data_read[[i]] <- scan(file_name, skip=line_start, nlines=good_lines, what=numeric(), sep=",")

	# Format to Matrix
	# The value returned from `scan()` is a vector
	# The number of columns is inferred; can be set explicitly for clarity, if known
	data_read[[i]] <- matrix(data_read[[i]], nrow=good_lines)
	}


	# =============================================
	# = Take Element-Wise Average Across Matrices =
	# =============================================

	# Format to array
	data_array <- simplify2array(data_read)

	# Take average
	data_array_mean <- apply(data_array, c(1,2), mean)