Last active
August 27, 2024 10:10
-
-
Save lambdalisue/22fe05493d4a3a6c34651959e62ebb2f to your computer and use it in GitHub Desktop.
English word length frequency
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { exists } from "jsr:@std/fs"; | |
import { map, reduce } from "jsr:@core/iterutil"; | |
const sourceUrl = | |
"https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"; | |
const filename = "words_alpha.txt"; | |
if (!(await exists(filename))) { | |
const resp = await fetch(sourceUrl); | |
await Deno.writeFile(filename, resp.body!); | |
} | |
const content = await Deno.readTextFile(filename); | |
const samples = content | |
.split("\n") | |
.filter((v) => v) | |
.map((v) => v.trim().length); | |
function sum(value: number[]): number { | |
return value.reduce((a, v) => a + v); | |
} | |
function mean(value: number[]): number { | |
return sum(value) / value.length; | |
} | |
function mode(value: number[]): number { | |
const counter = map( | |
Map.groupBy(value, (v) => v).entries(), | |
([n, vs]) => [n, vs.length] as const, | |
); | |
return reduce(counter, (a, v) => a[1] < v[1] ? v : a)![0]; | |
} | |
function median(value: number[]): number { | |
const freq = value.length; | |
if (freq % 2 === 0) { | |
const mid = freq / 2; | |
return (value[mid - 1] + value[mid]) / 2; | |
} else { | |
const mid = (freq - 1) / 2; | |
return value[mid]; | |
} | |
} | |
function quantile(value: number[]): [q1: number, q2: number, q3: number] { | |
const freq = value.length; | |
if (freq % 2 === 0) { | |
const mid = freq / 2; | |
const head = value.slice(0, mid); | |
const tail = value.slice(mid); | |
return [median(head), median(value), median(tail)]; | |
} else { | |
const mid = (freq - 1) / 2; | |
const head = value.slice(0, mid); | |
const tail = value.slice(mid + 1); | |
return [median(head), median(value), median(tail)]; | |
} | |
} | |
console.log(`Frequency: ${samples.length}`); | |
console.log(`Mean: ${mean(samples)}`); | |
console.log(`Mode: ${mode(samples)}`); | |
console.log(`Median: ${median(samples)}`); | |
console.log(`Quantile: ${quantile(samples)}`); | |
const counter = map( | |
Map.groupBy(samples, (v) => v).entries(), | |
([n, vs]) => [n, vs.length] as const, | |
); | |
console.log("Data:"); | |
for ( | |
const [n, v] of [...counter].sort(([a], [b]) => a === b ? 0 : a > b ? 1 : -1) | |
) { | |
console.log(`${n.toString().padStart(2)}, ${v}`); | |
} |
Author
lambdalisue
commented
Aug 27, 2024
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment