Created
October 2, 2020 19:29
-
-
Save luvies/f7b50e7e60498d94ec2385804dd359b2 to your computer and use it in GitHub Desktop.
A Deno script for getting full and partial anagrams of words
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env deno run --allow-read --allow-hrtime | |
import { readLines } from "https://deno.land/[email protected]/io/mod.ts"; | |
function intersection<T>(a: Set<T>, b: Set<T>): Set<T> { | |
// Quick optimisation by iterating the smaller set instead of the larger one. | |
const [small, large] = a.size > b.size ? [b, a] : [a, b]; | |
const intersect = new Set<T>(); | |
for (const item of small) { | |
if (large.has(item)) { | |
intersect.add(item); | |
} | |
} | |
return intersect; | |
} | |
function sortedString(str: string, unique?: boolean): string { | |
const sorted = str.toLowerCase().split("").sort(); | |
if (unique) { | |
const letters = new Set<string>(); | |
return sorted.filter((c) => { | |
if (letters.has(c)) { | |
return false; | |
} else { | |
letters.add(c); | |
return true; | |
} | |
}).join(""); | |
} else { | |
return sorted.join(""); | |
} | |
} | |
function noUndefineds<T>(arr: Array<T | undefined>): arr is T[] { | |
return !arr.includes(undefined); | |
} | |
function countLetters(word: string): Map<string, number> { | |
const countMap = new Map<string, number>(); | |
for (const c of word) { | |
countMap.set(c, (countMap.get(c) ?? 0) + 1); | |
} | |
return countMap; | |
} | |
function hasMinLetters(word: string, test: string): boolean { | |
const wordCount = countLetters(word); | |
const testCount = countLetters(test); | |
for (const [c, n] of wordCount) { | |
const tn = testCount.get(c); | |
if (tn === undefined || tn < n) { | |
return false; | |
} | |
} | |
return true; | |
} | |
class Anagram { | |
public static build(words: string[]): Anagram { | |
const cache = new Anagram(); | |
for (const word of words) { | |
// Add the word to the anagrams map. | |
const sorted = sortedString(word); | |
const anagramCache = cache.anagrams.get(sorted); | |
if (anagramCache) { | |
anagramCache.push(word); | |
} else { | |
cache.anagrams.set(sorted, [word]); | |
} | |
// Add the word to the letters map. | |
const unique = sortedString(word, true); | |
for (const c of unique) { | |
const letterCache = cache.letters.get(c); | |
if (letterCache) { | |
letterCache.add(sorted); | |
} else { | |
cache.letters.set(c, new Set([sorted])); | |
} | |
} | |
} | |
return cache; | |
} | |
/** | |
* A map from single letter -> set of anagram keys that contain that letter. | |
*/ | |
private letters = new Map<string, Set<string>>(); | |
/** | |
* A map from the sorted string | |
*/ | |
private anagrams = new Map<string, string[]>(); | |
public full(word: string): string[] | undefined { | |
return this.anagrams.get(sortedString(word)); | |
} | |
public partial(word: string): string[] | undefined { | |
const sorted = sortedString(word); | |
const unique = sortedString(word, true); | |
const letterSets = unique.split("").map((c) => this.letters.get(c)); | |
if (noUndefineds(letterSets)) { | |
// Sort sets so smallest is at the start. | |
// This makes sure we do the minimal amount of iteration. | |
const superAnagrams = letterSets.sort((a, b) => a.size - b.size) | |
// We then intersect on all sets, the result being the set of anagram | |
// keys that have all the letters of the given key or more. | |
.reduce(intersection); | |
const partials: string[] = []; | |
for (const key of superAnagrams) { | |
// We don't want full anagrams or keys that do not have the right | |
// number of each letter. | |
if (key === sorted || !hasMinLetters(word, key)) { | |
continue; | |
} | |
const words = this.anagrams.get(key); | |
if (words) { | |
partials.push(...words); | |
} | |
} | |
return partials; | |
} else { | |
return undefined; | |
} | |
} | |
} | |
//////////////////// Testing //////////////////// | |
function singleBench<T>(fn: () => T): [T, number] { | |
const start = performance.now(); | |
const res = fn(); | |
const len = performance.now() - start; | |
return [res, len]; | |
} | |
function displayResult(kind: string, fn: () => string[] | undefined): void { | |
const [list, fetchTime] = singleBench(fn); | |
if (list) { | |
console.log(`${kind} anagrams:`); | |
for (const anagram of list) { | |
console.log(`\t${anagram}`); | |
} | |
} else { | |
console.log(`No ${kind.toLowerCase()} anagrams.`); | |
} | |
console.log(`[Fetched results in ${fetchTime}ms]`); | |
} | |
function display(anagrams: Anagram, word: string): void { | |
displayResult("Full", () => anagrams.full(word)); | |
displayResult("Partial", () => anagrams.partial(word)); | |
} | |
async function main() { | |
const encoder = new TextEncoder(); | |
const prefix = () => Deno.stdout.write(encoder.encode("> ")); | |
const words = Deno.readTextFileSync("./data/english.txt").split("\n").map(( | |
w, | |
) => w.trim()); | |
const [anagrams, cacheTime] = singleBench(() => Anagram.build(words)); | |
console.log(`[Cache built in ${cacheTime}ms]`); | |
await prefix(); | |
for await (const line of readLines(Deno.stdin)) { | |
display(anagrams, line); | |
await prefix(); | |
} | |
} | |
await main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment