aammd · August 29, 2015 14:16
diff --git a/fill_down.R b/fill_down.R
 #' convert positional information to two columns
 #' 
 #' Sometimes text is organized by position. This function
 #' turns positional group labels (e.g headers ) into the levels of a grouping variable
 #' @param x character vector containing group labels followed by group members
 #' @param pattern regular expression that identifies the group labels
 fill_down <- function(x, pattern){
  ## find matches of the pattern 
  x <- as.character(x)
  value_matches <- grepl(pattern = pattern, x = x)
  ## get their positions
  loc <- which(value_matches)
  ## the first of these should be labelled
  stopifnot(min(loc) == 1)
  
  start <- loc
  end <- c(loc[-1], length(x) + 1)
  
  # measure the intervals between labels
  intervals <- end - start - 1
  
  rps <- Map(f = rep_len, x[loc], intervals)
  
  ## combine replicated values in a single vector"
  grps <- do.call(c, rps)
  
  ## get the values between labels
  xvals <- x[!value_matches]
  
  stopifnot(length(grps) == length(xvals))
  
  dplyr::data_frame(grps, xvals)
 }

 # imagine a list of letters divided into two sections: "A" and "B":

 test <- c("A", "b", "c", "e", "B", "g", "h", "i")
 fill_down(test, "A|B")

 # 
 # grps xvals
 # 1    A     b
 # 2    A     c
 # 3    A     e
 # 4    B     g
 # 5    B     h
 # 6    B     i
	#' convert positional information to two columns
	#'
	#' Sometimes text is organized by position. This function
	#' turns positional group labels (e.g headers ) into the levels of a grouping variable
	#' @param x character vector containing group labels followed by group members
	#' @param pattern regular expression that identifies the group labels
	fill_down <- function(x, pattern){
	## find matches of the pattern
	x <- as.character(x)
	value_matches <- grepl(pattern = pattern, x = x)
	## get their positions
	loc <- which(value_matches)
	## the first of these should be labelled
	stopifnot(min(loc) == 1)

	start <- loc
	end <- c(loc[-1], length(x) + 1)

	# measure the intervals between labels
	intervals <- end - start - 1

	rps <- Map(f = rep_len, x[loc], intervals)

	## combine replicated values in a single vector"
	grps <- do.call(c, rps)

	## get the values between labels
	xvals <- x[!value_matches]

	stopifnot(length(grps) == length(xvals))

	dplyr::data_frame(grps, xvals)
	}

	# imagine a list of letters divided into two sections: "A" and "B":

	test <- c("A", "b", "c", "e", "B", "g", "h", "i")
	fill_down(test, "A\|B")

	#
	# grps xvals
	# 1 A b
	# 2 A c
	# 3 A e
	# 4 B g
	# 5 B h
	# 6 B i