technocrat · October 14, 2023 06:36
diff --git a/aristotle.R b/aristotle.R
 # aristotle.R
 # author: Richard Careaga
 # problem to illustrate analytic programming
 # Date: 2023-10-14

 # what we have: a text copy of an old fixed-length format tape
 # intended to record numeric blocks of data in 20 character increments
 # without delimiters; the objective is to divide the text into
 # 20 character blocks, find the index of the last "0" in each block
 # and record the block sequence number of the block together with the 
 # numeric characters preceding it back to the next "0" reading right to 
 # left. example for the first block
 # "62034706309085592380" --> 1 8559238

 # strategy
 # 1. divide the source data into 20-character
 # strings
 # 2. write functions to parse the strings 
 # according to the criteria for extracting
 # digits that appear between "0" characters
 # 3. classify strings into categories and
 # obtain location indexes for each category
 # 4. subsets strings by category and apply
 # the appropriate function
 # key step is to parse each string according to
 # the index location of the "0" chracters
 # contained--example "62034706309085592380"
 # has "0" at 
 # find_zero("62034706309085592380")
 # [1]  3  7 10 12 20

 # functions

 double_zero  <- function(x) x[length(x)] - x[length(x)-1] == 1
 find_zero    <- function(x) as.numeric(gregexpr("0", x)[[1]])
 first_zero   <- function(x) ifelse(x == 1 & length(x) == 1,TRUE,FALSE)
 has_zero     <- function(x) grep("0",x[,2])
 lacks_zero   <- function(x) grep("0",x[,2], invert = TRUE)
 multi_zero   <- function(x) ifelse(length(x) >= 2,TRUE,FALSE)
 parse_multi  <- function(x) {
  pull_multi = function(x) gsub(".*0(.*)0.*", "\\1", x)
  ifelse(pull_multi(x) == "",NA,pull_multi(x)) |>
    gsub("^0|0$","",x=_)
 }
 parse_single <- function(x) gsub("0.*$","",x)
 single_zero  <- function(x) ifelse(x != 1 & length(x) == 1,TRUE,FALSE)

 # data

 x <- paste(readLines("https://gist.githubusercontent.com/technocrat/50e92332adacc7f25b10c272f512e0f4/raw/67e1fe4db09cdafecfb7907c863987e1b0444dd1/legacy%2520tape"), collapse = '\n')

 # checks: divisible into 80-character blocks,
 # only digits 0-9 and show beginning
 nchar(x) %% 20
 grep("[^0-9]",x)
 paste(substr(x,1,160),"...")

 # example: first 20 character block

 substr(x, 1, 20)

 # what we want

 # y example: first row of target matrix y

 yex <- matrix(c(1,8559238),1,2)
 colnames(yex) <- c("id","extract")
 yex

 # y = f(x): f is to be composed

 # preprocessing: divide into blocks of 20 
 # characters, keeping track of block sequence

 char_list <- strsplit(x, "")[[1]]
 chunk_size <- 20
 num_chunks <- ceiling(length(char_list) / chunk_size)
 chunks <- split(char_list, ceiling(seq_along(char_list) / chunk_size))
 x <- matrix(sapply(chunks, paste, collapse = ""),ncol = 1)
 # add a record identifier
 id <- row(x)
 x <- cbind(id,x)
 colnames(x) <- c("id","extract")

 # list of positions of zero in each block of 20
 # applied to x, the data
 lp <- lapply(x[,2],find_zero)

 # index of rows of each type
 the_all_zeros      <- which(lapply(lp,length) == 20)
 the_double_zeros   <- which(lapply(lp,double_zero) == TRUE)
 the_first_zeros    <- which(lapply(lapply(lp,first_zero),isTRUE) == TRUE)
 the_lacks_zeros    <- which(lapply(lp,sum) == -1) 
 the_multi_zeros    <- which(lapply(lp,length) >= 2)
 the_single_zeros   <- which(lapply(lp,length) == 1)
 the_discards       <- c(the_all_zeros,the_double_zeros,the_first_zeros,the_lacks_zeros)

 # receiver object
 y <- matrix(rep("blank",640))

 # there should be no values "blank" remaining after assigning
 # values to x[index] where index is the_multi_zeros,
 # the_single_zeros, the_discards

 # main
 y[the_multi_zeros]  <- parse_multi(x[the_multi_zeros,2])
 y[the_single_zeros] <- parse_single(x[the_single_zeros,2])
 y[the_discards]     <- NA
 cbind(x[,2],y)
	# aristotle.R
	# author: Richard Careaga
	# problem to illustrate analytic programming
	# Date: 2023-10-14

	# what we have: a text copy of an old fixed-length format tape
	# intended to record numeric blocks of data in 20 character increments
	# without delimiters; the objective is to divide the text into
	# 20 character blocks, find the index of the last "0" in each block
	# and record the block sequence number of the block together with the
	# numeric characters preceding it back to the next "0" reading right to
	# left. example for the first block
	# "62034706309085592380" --> 1 8559238

	# strategy
	# 1. divide the source data into 20-character
	# strings
	# 2. write functions to parse the strings
	# according to the criteria for extracting
	# digits that appear between "0" characters
	# 3. classify strings into categories and
	# obtain location indexes for each category
	# 4. subsets strings by category and apply
	# the appropriate function
	# key step is to parse each string according to
	# the index location of the "0" chracters
	# contained--example "62034706309085592380"
	# has "0" at
	# find_zero("62034706309085592380")
	# [1] 3 7 10 12 20

	# functions

	double_zero <- function(x) x[length(x)] - x[length(x)-1] == 1
	find_zero <- function(x) as.numeric(gregexpr("0", x)[[1]])
	first_zero <- function(x) ifelse(x == 1 & length(x) == 1,TRUE,FALSE)
	has_zero <- function(x) grep("0",x[,2])
	lacks_zero <- function(x) grep("0",x[,2], invert = TRUE)
	multi_zero <- function(x) ifelse(length(x) >= 2,TRUE,FALSE)
	parse_multi <- function(x) {
	pull_multi = function(x) gsub(".0(.)0.*", "\\1", x)
	ifelse(pull_multi(x) == "",NA,pull_multi(x)) \|>
	gsub("^0\|0$","",x=_)
	}
	parse_single <- function(x) gsub("0.*$","",x)
	single_zero <- function(x) ifelse(x != 1 & length(x) == 1,TRUE,FALSE)

	# data

	x <- paste(readLines("https://gist.githubusercontent.com/technocrat/50e92332adacc7f25b10c272f512e0f4/raw/67e1fe4db09cdafecfb7907c863987e1b0444dd1/legacy%2520tape"), collapse = '\n')

	# checks: divisible into 80-character blocks,
	# only digits 0-9 and show beginning
	nchar(x) %% 20
	grep("[^0-9]",x)
	paste(substr(x,1,160),"...")

	# example: first 20 character block

	substr(x, 1, 20)

	# what we want

	# y example: first row of target matrix y

	yex <- matrix(c(1,8559238),1,2)
	colnames(yex) <- c("id","extract")
	yex

	# y = f(x): f is to be composed

	# preprocessing: divide into blocks of 20
	# characters, keeping track of block sequence

	char_list <- strsplit(x, "")[[1]]
	chunk_size <- 20
	num_chunks <- ceiling(length(char_list) / chunk_size)
	chunks <- split(char_list, ceiling(seq_along(char_list) / chunk_size))
	x <- matrix(sapply(chunks, paste, collapse = ""),ncol = 1)
	# add a record identifier
	id <- row(x)
	x <- cbind(id,x)
	colnames(x) <- c("id","extract")

	# list of positions of zero in each block of 20
	# applied to x, the data
	lp <- lapply(x[,2],find_zero)

	# index of rows of each type
	the_all_zeros <- which(lapply(lp,length) == 20)
	the_double_zeros <- which(lapply(lp,double_zero) == TRUE)
	the_first_zeros <- which(lapply(lapply(lp,first_zero),isTRUE) == TRUE)
	the_lacks_zeros <- which(lapply(lp,sum) == -1)
	the_multi_zeros <- which(lapply(lp,length) >= 2)
	the_single_zeros <- which(lapply(lp,length) == 1)
	the_discards <- c(the_all_zeros,the_double_zeros,the_first_zeros,the_lacks_zeros)

	# receiver object
	y <- matrix(rep("blank",640))

	# there should be no values "blank" remaining after assigning
	# values to x[index] where index is the_multi_zeros,
	# the_single_zeros, the_discards

	# main
	y[the_multi_zeros] <- parse_multi(x[the_multi_zeros,2])
	y[the_single_zeros] <- parse_single(x[the_single_zeros,2])
	y[the_discards] <- NA
	cbind(x[,2],y)