thesephist · June 8, 2020 11:19
diff --git a/analysis.ink b/analysis.ink
 ` Count sentence length, word size distribution over past posts `

 std := load('std')
 str := load('str')
 quicksort := load('quicksort')

 log := std.log
 f := std.format
 append := std.append
 cat := std.cat
 slice := std.slice
 flatten := std.flatten
 reduce := std.reduce
 map := std.map
 each := std.each
 filter := std.filter
 readFile := std.readFile
 writeFile := std.writeFile

 hasPrefix? := str.hasPrefix?
 split := str.split
 trim := str.trim
 trimS := s => trim(s, ' ')

 sortBy := quicksort.sortBy
 sort := quicksort.sort

 ` Constants `

 PostsDir := './content/posts'
 Newline := char(10)

 blank? := s => trimS(s) = ''

 ` find all blog posts on the site and callback with all file names `
 withAllPosts := cb => (
 	postFiles := dir(PostsDir, evt => evt.type :: {
 		'error' -> log('error: could not read posts directory!')
 		'data' -> cb(filter(
 			map(evt.data, entry => entry.name)
 			` filter out hidden files and the _index.md file `
 			fName => ~(hasPrefix?(fName, '.') | hasPrefix?(fName, '_'))
 		))
 	})
 )

 ` given a potentially double-quoted string, strip the quotes `
 stripQuotes := s => s.0 :: {
 	'"' -> slice(s, 1, len(s) - 1)
 	_ -> s
 }

 ` given a file name to a blog post, parse it completely
 	and return a PostRecord structure with parsed metadata and body `
 withPostRecord := (fileName, cb) => (
 	readFile(PostsDir + '/' + fileName, bytes => (
 		lines := filter(
 			split(bytes, Newline)
 			` remove raw HTML lines `
 			line => ~(blank?(line) | hasPrefix?(line, '<'))
 		)

 		` sanitize lines `
 		lines := map(lines, line => (sub := i => i :: {
 			0 -> line
 			_ -> (
 				line.(i) :: {
 					'_' -> line.(i) := ' '
 					'*' -> line.(i) := ' '
 					'[' -> line.(i) := ' '
 					']' -> line.(i) := ' '
 					'(' -> line.(i) := ' '
 					')' -> line.(i) := ' '
 				}
 				sub(i - 1)
 			)
 		})(len(line) - 1))

 		record := {
 			` parse state:
 				0 -> start
 				1 -> inside front matter
 				2 -> after front matter
 				3 -> error, stop parsing `
 			parseState: 0
 			title: ()
 			date: ()
 			body: []
 		}
 		each(lines, line => record.parseState :: {
 			0 -> line :: {
 				'---' -> record.parseState := 1
 				_ -> (
 					log(f('error: unexpected line in post file, {{0}}', [line]))
 					record.parseState := 3
 				)
 			}
 			1 -> line :: {
 				'---' -> record.parseState := 2
 				_ -> split(line, ':').0 :: {
 					'title' -> record.title := stripQuotes(trimS(split(line, 'title:').1))
 					'date' -> record.date := trimS(split(line, 'date:').1)
 				}
 			}
 			2 -> record.body.len(record.body) := line
 			3 -> ()
 		})

 		cb(record)
 	))
 )

 ` mean of an array `
 mean := xs => len(xs) :: {
 	0 -> ~1
 	_ -> reduce(xs, (a, b) => a + b, 0) / len(xs)
 }

 ` median of an array `
 median := xs => xs :: {
 	[] -> ~1
 	_ -> (
 		sorted := sort(xs)
 		mid := floor(len(sorted) / 2)
 		(len(sorted) % 2) :: {
 			0 -> (sorted.(mid) + sorted.(mid - 1)) / 2
 			1 -> sorted.(mid)
 		}
 	)
 }

 ` split up a blog post body into a flat list of words
 	includes doing some sanitization `
 getWords := record => filter(
 	flatten(map(record.body, line => split(line, ' ')))
 	` try to remove links and empty words`
 	word => blank?(word) :: {
 		true -> false
 		_ -> ~(hasPrefix?(word, 'http') | hasPrefix?(word, '/'))
 	}
 )

 ` split up a blog post body into a flat list of sentences
 	includes doing some sanitization `
 getSentences := record => flatten(map(record.body, line => split(line, '. ')))

 ` main analysis function that works per-PostRecord, computing
 	statistics over the post body and publishing a CSV `
 analyze := records => (
 	sorted := sortBy(records, r => r.date)

 	log('Serializing word list...')
 	wordLengths := map(sorted, r => map(getWords(r), len))
 	log('Serializing sentence list...')
 	sentenceLengths := map(sorted, r => map(
 		getSentences(r)
 		sent => len(filter(split(sent, ' '), w => ~blank?(w)))
 	))

 	log('Computing mean word lengths')
 	meanWordLengths := map(wordLengths, mean)

 	log('Computing median word lengths')
 	medianWordLengths := map(wordLengths, median)

 	log('Computing median sentence lengths')
 	medianSentenceLengths := map(sentenceLengths, median)

 	log('Computing median paragraph lengths')
 	paragraphLengths := map(sorted, record => map(
 		record.body
 		para => len(filter(split(para, ' '), w => ~blank?(w)))
 	))
 	medianParagraphLengths := map(paragraphLengths, median)

 	results := {
 		dates: map(sorted, r => r.date)
 		meanWordLengths: meanWordLengths
 		medianWordLengths: medianWordLengths
 		medianSentenceLengths: medianSentenceLengths
 		medianParagraphLengths: medianParagraphLengths
 	}

 	csv := renderCSV(results)

 	log(csv)

 	writeFile('./analysis.csv', csv, done => done :: {
 		true -> log('File saved to ./analysis.csv successfully!')
 		() -> log('error: failed to save analysis results csv!')
 	})
 )

 ` render results into a CSV for importing into Google Sheets `
 renderCSV := results => (
 	csvLines := []

 	each(keys(results), key => (
 		rowData := append([key], map(results.(key), string))
 		csvLines.len(csvLines) := cat(rowData, ',')
 	))

 	cat(csvLines, Newline)
 )

 ` main analysis routine `

 postRecords := []

 withAllPosts(fileNames => each(
 	fileNames
 	fName => withPostRecord(fName, record => (
 		log(f('read: [{{ date }}] {{ title }}', record))

 		postRecords.len(postRecords) := record
 		len(postRecords) :: {
 			len(fileNames) -> analyze(postRecords)
 		}
 	))
 ))
diff --git a/quicksort.ink b/quicksort.ink
 ` minimal quicksort implementation
 	using hoare partition `

 std := load('std')

 map := std.map
 clone := std.clone

 sortBy := (v, pred) => (
 	vPred := map(v, pred)
 	partition := (v, lo, hi) => (
 		pivot := vPred.(lo)
 		lsub := i => (vPred.(i) < pivot) :: {
 			true -> lsub(i + 1)
 			false -> i
 		}
 		rsub := j => (vPred.(j) > pivot) :: {
 			true -> rsub(j - 1)
 			false -> j
 		}
 		(sub := (i, j) => (
 			i := lsub(i)
 			j := rsub(j)
 			(i < j) :: {
 				false -> j
 				true -> (
 					` inlined swap! `
 					tmp := v.(i)
 					tmpPred := vPred.(i)
 					v.(i) := v.(j)
 					v.(j) := tmp
 					vPred.(i) := vPred.(j)
 					vPred.(j) := tmpPred

 					sub(i + 1, j - 1)
 				)
 			}
 		))(lo, hi)
 	)
 	(quicksort := (v, lo, hi) => len(v) :: {
 		0 -> v
 		_ -> (lo < hi) :: {
 			false -> v
 			true -> (
 				p := partition(v, lo, hi)
 				quicksort(v, lo, p)
 				quicksort(v, p + 1, hi)
 			)
 		}
 	})(v, 0, len(v) - 1)
 )

 sort! := v => sortBy(v, x => x)

 sort := v => sort!(clone(v))
	` Count sentence length, word size distribution over past posts `

	std := load('std')
	str := load('str')
	quicksort := load('quicksort')

	log := std.log
	f := std.format
	append := std.append
	cat := std.cat
	slice := std.slice
	flatten := std.flatten
	reduce := std.reduce
	map := std.map
	each := std.each
	filter := std.filter
	readFile := std.readFile
	writeFile := std.writeFile

	hasPrefix? := str.hasPrefix?
	split := str.split
	trim := str.trim
	trimS := s => trim(s, ' ')

	sortBy := quicksort.sortBy
	sort := quicksort.sort

	` Constants `

	PostsDir := './content/posts'
	Newline := char(10)

	blank? := s => trimS(s) = ''

	` find all blog posts on the site and callback with all file names `
	withAllPosts := cb => (
	postFiles := dir(PostsDir, evt => evt.type :: {
	'error' -> log('error: could not read posts directory!')
	'data' -> cb(filter(
	map(evt.data, entry => entry.name)
	` filter out hidden files and the _index.md file `
	fName => ~(hasPrefix?(fName, '.') \| hasPrefix?(fName, '_'))
	))
	})
	)

	` given a potentially double-quoted string, strip the quotes `
	stripQuotes := s => s.0 :: {
	'"' -> slice(s, 1, len(s) - 1)
	_ -> s
	}

	` given a file name to a blog post, parse it completely
	and return a PostRecord structure with parsed metadata and body `
	withPostRecord := (fileName, cb) => (
	readFile(PostsDir + '/' + fileName, bytes => (
	lines := filter(
	split(bytes, Newline)
	` remove raw HTML lines `
	line => ~(blank?(line) \| hasPrefix?(line, '<'))
	)

	` sanitize lines `
	lines := map(lines, line => (sub := i => i :: {
	0 -> line
	_ -> (
	line.(i) :: {
	'_' -> line.(i) := ' '
	'*' -> line.(i) := ' '
	'[' -> line.(i) := ' '
	']' -> line.(i) := ' '
	'(' -> line.(i) := ' '
	')' -> line.(i) := ' '
	}
	sub(i - 1)
	)
	})(len(line) - 1))

	record := {
	` parse state:
	0 -> start
	1 -> inside front matter
	2 -> after front matter
	3 -> error, stop parsing `
	parseState: 0
	title: ()
	date: ()
	body: []
	}
	each(lines, line => record.parseState :: {
	0 -> line :: {
	'---' -> record.parseState := 1
	_ -> (
	log(f('error: unexpected line in post file, {{0}}', [line]))
	record.parseState := 3
	)
	}
	1 -> line :: {
	'---' -> record.parseState := 2
	_ -> split(line, ':').0 :: {
	'title' -> record.title := stripQuotes(trimS(split(line, 'title:').1))
	'date' -> record.date := trimS(split(line, 'date:').1)
	}
	}
	2 -> record.body.len(record.body) := line
	3 -> ()
	})

	cb(record)
	))
	)

	` mean of an array `
	mean := xs => len(xs) :: {
	0 -> ~1
	_ -> reduce(xs, (a, b) => a + b, 0) / len(xs)
	}

	` median of an array `
	median := xs => xs :: {
	[] -> ~1
	_ -> (
	sorted := sort(xs)
	mid := floor(len(sorted) / 2)
	(len(sorted) % 2) :: {
	0 -> (sorted.(mid) + sorted.(mid - 1)) / 2
	1 -> sorted.(mid)
	}
	)
	}

	` split up a blog post body into a flat list of words
	includes doing some sanitization `
	getWords := record => filter(
	flatten(map(record.body, line => split(line, ' ')))
	` try to remove links and empty words`
	word => blank?(word) :: {
	true -> false
	_ -> ~(hasPrefix?(word, 'http') \| hasPrefix?(word, '/'))
	}
	)

	` split up a blog post body into a flat list of sentences
	includes doing some sanitization `
	getSentences := record => flatten(map(record.body, line => split(line, '. ')))

	` main analysis function that works per-PostRecord, computing
	statistics over the post body and publishing a CSV `
	analyze := records => (
	sorted := sortBy(records, r => r.date)

	log('Serializing word list...')
	wordLengths := map(sorted, r => map(getWords(r), len))
	log('Serializing sentence list...')
	sentenceLengths := map(sorted, r => map(
	getSentences(r)
	sent => len(filter(split(sent, ' '), w => ~blank?(w)))
	))

	log('Computing mean word lengths')
	meanWordLengths := map(wordLengths, mean)

	log('Computing median word lengths')
	medianWordLengths := map(wordLengths, median)

	log('Computing median sentence lengths')
	medianSentenceLengths := map(sentenceLengths, median)

	log('Computing median paragraph lengths')
	paragraphLengths := map(sorted, record => map(
	record.body
	para => len(filter(split(para, ' '), w => ~blank?(w)))
	))
	medianParagraphLengths := map(paragraphLengths, median)

	results := {
	dates: map(sorted, r => r.date)
	meanWordLengths: meanWordLengths
	medianWordLengths: medianWordLengths
	medianSentenceLengths: medianSentenceLengths
	medianParagraphLengths: medianParagraphLengths
	}

	csv := renderCSV(results)

	log(csv)

	writeFile('./analysis.csv', csv, done => done :: {
	true -> log('File saved to ./analysis.csv successfully!')
	() -> log('error: failed to save analysis results csv!')
	})
	)

	` render results into a CSV for importing into Google Sheets `
	renderCSV := results => (
	csvLines := []

	each(keys(results), key => (
	rowData := append([key], map(results.(key), string))
	csvLines.len(csvLines) := cat(rowData, ',')
	))

	cat(csvLines, Newline)
	)

	` main analysis routine `

	postRecords := []

	withAllPosts(fileNames => each(
	fileNames
	fName => withPostRecord(fName, record => (
	log(f('read: [{{ date }}] {{ title }}', record))

	postRecords.len(postRecords) := record
	len(postRecords) :: {
	len(fileNames) -> analyze(postRecords)
	}
	))
	))
	` minimal quicksort implementation
	using hoare partition `

	std := load('std')

	map := std.map
	clone := std.clone

	sortBy := (v, pred) => (
	vPred := map(v, pred)
	partition := (v, lo, hi) => (
	pivot := vPred.(lo)
	lsub := i => (vPred.(i) < pivot) :: {
	true -> lsub(i + 1)
	false -> i
	}
	rsub := j => (vPred.(j) > pivot) :: {
	true -> rsub(j - 1)
	false -> j
	}
	(sub := (i, j) => (
	i := lsub(i)
	j := rsub(j)
	(i < j) :: {
	false -> j
	true -> (
	` inlined swap! `
	tmp := v.(i)
	tmpPred := vPred.(i)
	v.(i) := v.(j)
	v.(j) := tmp
	vPred.(i) := vPred.(j)
	vPred.(j) := tmpPred

	sub(i + 1, j - 1)
	)
	}
	))(lo, hi)
	)
	(quicksort := (v, lo, hi) => len(v) :: {
	0 -> v
	_ -> (lo < hi) :: {
	false -> v
	true -> (
	p := partition(v, lo, hi)
	quicksort(v, lo, p)
	quicksort(v, p + 1, hi)
	)
	}
	})(v, 0, len(v) - 1)
	)

	sort! := v => sortBy(v, x => x)

	sort := v => sort!(clone(v))