jmbarbone · May 22, 2022 17:23
diff --git a/data-frame-from-loop.R b/data-frame-from-loop.R
 # original ----------------------------------------------------------------

 new <- list()      # construct as list -- data.frames are fancy lists
 cols <- c(1, 5, 3) # use a vector of column indices
 for (i in seq_along(cols)) {
  # append the list at each column
  new[[i]] <- mtcars[, cols[i], drop = FALSE]
 }

 new <- as.data.frame(new)      # make list into data.frame
 identical(new, mtcars[, cols]) # check that this produces the same thing
 #> [1] TRUE
 head(new)
 #>                    mpg drat disp
 #> Mazda RX4         21.0 3.90  160
 #> Mazda RX4 Wag     21.0 3.90  160
 #> Datsun 710        22.8 3.85  108
 #> Hornet 4 Drive    21.4 3.08  258
 #> Hornet Sportabout 18.7 3.15  360
 #> Valiant           18.1 2.76  225
 str(new)
 #> 'data.frame':    32 obs. of  3 variables:
 #>  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
 #>  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
 #>  $ disp: num  160 160 108 258 360 ...


 # updates -----------------------------------------------------------------

 data <- Reduce(
  cbind,
  lapply(
    1:20,
    function(i) {
      out <- data.frame(
        id = order(runif(5)),
        event = runif(5) < .5,
        other_col = runif(5)
      )
      colnames(out) <- paste0(colnames(out), i)
      out
    }
  )
 )

 # just a quick peak
 str(data[, c(1:3, 9:12, 21:24)])
 #> 'data.frame':    5 obs. of  11 variables:
 #>  $ id1       : int  3 2 1 4 5
 #>  $ event1    : logi  FALSE FALSE TRUE TRUE FALSE
 #>  $ other_col1: num  0.617 0.951 0.511 0.185 0.667
 #>  $ other_col3: num  0.6856 0.0524 0.5786 0.9265 0.2291
 #>  $ id4       : int  4 2 1 5 3
 #>  $ event4    : logi  TRUE TRUE FALSE FALSE FALSE
 #>  $ other_col4: num  0.0849 0.8345 0.8465 0.1958 0.2534
 #>  $ other_col7: num  0.656 0.353 0.604 0.973 0.381
 #>  $ id8       : int  2 3 5 4 1
 #>  $ event8    : logi  TRUE FALSE FALSE TRUE TRUE
 #>  $ other_col8: num  0.646 0.693 0.534 0.624 0.625

 result <- lapply(1:20, function(i) {
  # make pattern (must have letters before number)
  pattern <- paste0("[a-z]", i, "$") 
  
  # find the column indeces that match the pattern
  ind <- grep(pattern, colnames(data))
  
  # extract those indices
  res <- data[, ind, ]
  
  # optional: rename columns
  colnames(res) <- sub(paste0(i, "$"), "", colnames(res))
  res
 })

 head(result)
 #> [[1]]
 #>   id event other_col
 #> 1  3 FALSE 0.6174577
 #> 2  2 FALSE 0.9509916
 #> 3  1  TRUE 0.5107370
 #> 4  4  TRUE 0.1851543
 #> 5  5 FALSE 0.6670226
 #> 
 #> [[2]]
 #>   id event other_col
 #> 1  3  TRUE 0.8261719
 #> 2  4 FALSE 0.4171351
 #> 3  1  TRUE 0.5640345
 #> 4  5  TRUE 0.6825371
 #> 5  2 FALSE 0.4381013
 #> 
 #> [[3]]
 #>   id event  other_col
 #> 1  4 FALSE 0.68559712
 #> 2  3 FALSE 0.05241906
 #> 3  2 FALSE 0.57857342
 #> 4  1  TRUE 0.92649458
 #> 5  5  TRUE 0.22908630
 #> 
 #> [[4]]
 #>   id event  other_col
 #> 1  4  TRUE 0.08491369
 #> 2  2  TRUE 0.83452439
 #> 3  1 FALSE 0.84650621
 #> 4  5 FALSE 0.19578470
 #> 5  3 FALSE 0.25342999
 #> 
 #> [[5]]
 #>   id event other_col
 #> 1  4 FALSE 0.8912857
 #> 2  1 FALSE 0.1261470
 #> 3  3 FALSE 0.7962369
 #> 4  5  TRUE 0.3911494
 #> 5  2 FALSE 0.6041862
 #> 
 #> [[6]]
 #>   id event other_col
 #> 1  4  TRUE 0.8987728
 #> 2  2  TRUE 0.2830371
 #> 3  5 FALSE 0.6696249
 #> 4  3 FALSE 0.6249742
 #> 5  1 FALSE 0.4754757
	# original ----------------------------------------------------------------

	new <- list() # construct as list -- data.frames are fancy lists
	cols <- c(1, 5, 3) # use a vector of column indices
	for (i in seq_along(cols)) {
	# append the list at each column
	new[[i]] <- mtcars[, cols[i], drop = FALSE]
	}

	new <- as.data.frame(new) # make list into data.frame
	identical(new, mtcars[, cols]) # check that this produces the same thing
	#> [1] TRUE
	head(new)
	#> mpg drat disp
	#> Mazda RX4 21.0 3.90 160
	#> Mazda RX4 Wag 21.0 3.90 160
	#> Datsun 710 22.8 3.85 108
	#> Hornet 4 Drive 21.4 3.08 258
	#> Hornet Sportabout 18.7 3.15 360
	#> Valiant 18.1 2.76 225
	str(new)
	#> 'data.frame': 32 obs. of 3 variables:
	#> $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
	#> $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
	#> $ disp: num 160 160 108 258 360 ...


	# updates -----------------------------------------------------------------

	data <- Reduce(
	cbind,
	lapply(
	1:20,
	function(i) {
	out <- data.frame(
	id = order(runif(5)),
	event = runif(5) < .5,
	other_col = runif(5)
	)
	colnames(out) <- paste0(colnames(out), i)
	out
	}
	)
	)

	# just a quick peak
	str(data[, c(1:3, 9:12, 21:24)])
	#> 'data.frame': 5 obs. of 11 variables:
	#> $ id1 : int 3 2 1 4 5
	#> $ event1 : logi FALSE FALSE TRUE TRUE FALSE
	#> $ other_col1: num 0.617 0.951 0.511 0.185 0.667
	#> $ other_col3: num 0.6856 0.0524 0.5786 0.9265 0.2291
	#> $ id4 : int 4 2 1 5 3
	#> $ event4 : logi TRUE TRUE FALSE FALSE FALSE
	#> $ other_col4: num 0.0849 0.8345 0.8465 0.1958 0.2534
	#> $ other_col7: num 0.656 0.353 0.604 0.973 0.381
	#> $ id8 : int 2 3 5 4 1
	#> $ event8 : logi TRUE FALSE FALSE TRUE TRUE
	#> $ other_col8: num 0.646 0.693 0.534 0.624 0.625

	result <- lapply(1:20, function(i) {
	# make pattern (must have letters before number)
	pattern <- paste0("[a-z]", i, "$")

	# find the column indeces that match the pattern
	ind <- grep(pattern, colnames(data))

	# extract those indices
	res <- data[, ind, ]

	# optional: rename columns
	colnames(res) <- sub(paste0(i, "$"), "", colnames(res))
	res
	})

	head(result)
	#> [[1]]
	#> id event other_col
	#> 1 3 FALSE 0.6174577
	#> 2 2 FALSE 0.9509916
	#> 3 1 TRUE 0.5107370
	#> 4 4 TRUE 0.1851543
	#> 5 5 FALSE 0.6670226
	#>
	#> [[2]]
	#> id event other_col
	#> 1 3 TRUE 0.8261719
	#> 2 4 FALSE 0.4171351
	#> 3 1 TRUE 0.5640345
	#> 4 5 TRUE 0.6825371
	#> 5 2 FALSE 0.4381013
	#>
	#> [[3]]
	#> id event other_col
	#> 1 4 FALSE 0.68559712
	#> 2 3 FALSE 0.05241906
	#> 3 2 FALSE 0.57857342
	#> 4 1 TRUE 0.92649458
	#> 5 5 TRUE 0.22908630
	#>
	#> [[4]]
	#> id event other_col
	#> 1 4 TRUE 0.08491369
	#> 2 2 TRUE 0.83452439
	#> 3 1 FALSE 0.84650621
	#> 4 5 FALSE 0.19578470
	#> 5 3 FALSE 0.25342999
	#>
	#> [[5]]
	#> id event other_col
	#> 1 4 FALSE 0.8912857
	#> 2 1 FALSE 0.1261470
	#> 3 3 FALSE 0.7962369
	#> 4 5 TRUE 0.3911494
	#> 5 2 FALSE 0.6041862
	#>
	#> [[6]]
	#> id event other_col
	#> 1 4 TRUE 0.8987728
	#> 2 2 TRUE 0.2830371
	#> 3 5 FALSE 0.6696249
	#> 4 3 FALSE 0.6249742
	#> 5 1 FALSE 0.4754757