EIIisD · April 24, 2025 21:40
diff --git a/en_gb-superwhisper.ts b/en_gb-superwhisper.ts
 import { stdin } from "node:process";

 interface SuffixRule {
 	re: RegExp;
 	rep: string;
 }

 const PHRASE_DICT: Readonly<Record<string, string>> = Object.freeze({
 	"cell phone": "mobile phone",
 	"french fries": "chips",
 });

 const WORD_DICT: Readonly<Record<string, string>> = Object.freeze({
 	truck: "lorry",
 	cookie: "biscuit",
 	color: "colour",
 	flavor: "flavour",
 	honor: "honour",
 	neighbor: "neighbour",
 	behavior: "behaviour",
 	favorite: "favourite",
 	program: "programme",
 	draft: "draught",
 	analyze: "analyse",
 	defense: "defence",
 	offense: "offence",
 	license: "licence",
 	dialog: "dialogue",
 	gray: "grey",
 	humor: "humour",
 	analog: "analogue",
 	catalog: "catalogue",
 	practice: "practise",
 	practicing: "practising",
 	sidewalk: "pavement",
 	gas: "petrol",
 	elevator: "lift",
 	donut: "doughnut",
 	airplane: "aeroplane",
 	synthesize: "synthesise",
 	center: "centre",
 	meter: "metre",
 	theater: "theatre",
 	anemia: "anaemia",
 	diarrhea: "diarrhoea",
 	encyclopedia: "encyclopaedia",
 	estrogen: "oestrogen",
 	"mr.": "mr",
 	"mrs.": "mrs",
 	"dr.": "dr",
 });

 const phraseRe = new RegExp(
 	`\\\\b(${Object.keys(PHRASE_DICT)
 		.sort((a, b) => b.length - a.length)
 		.map((k) => k.replace(/[.*+?^${}()|[\]\\\\]/g, "\\\\$&"))
 		.join("|")})\\\\b`,
 	"gi",
 );

 const wordRe = /[A-Za-z]+(?:'[A-Za-z]+)?\\.?|[A-Za-z]+(?:-[A-Za-z]+)+/g;

 const SUFFIX_RULES: ReadonlyArray<SuffixRule> = Object.freeze([
 	{ re: /(?<!analy)iz(e)?(s|d|er|ers|ing)?\\b/i, rep: "is$1$2" },
 	{ re: /(\\w*[aeiou])l(ed|ing|er|ers)\\b/i, rep: "$1ll$2" },
 ]);

 const isInitialCap = (s: string): boolean => {
 	if (!s) return false;
 	const code = s.charCodeAt(0);
 	return code >= 65 && code <= 90; // ASCII A-Z
 };

 const preserveCase = (
 	originalWord: string,
 	replacementWord: string,
 ): string => {
 	if (originalWord === originalWord.toUpperCase()) {
 		return replacementWord.toUpperCase();
 	}
 	return isInitialCap(originalWord)
 		? replacementWord.charAt(0).toUpperCase() +
 				replacementWord.substring(1).toLowerCase()
 		: replacementWord.toLowerCase();
 };

 const applySuffixRules = (baseWord: string): string => {
 	for (const rule of SUFFIX_RULES) {
 		if (rule.re.test(baseWord)) {
 			return baseWord.replace(rule.re, rule.rep);
 		}
 	}
 	return baseWord;
 };

 const processToken = (token: string): string => {
 	let base = token;
 	let suffix = "";

 	if (base.endsWith("'s")) {
 		base = base.substring(0, base.length - 2);
 		suffix = "'s";
 	} else if (base.endsWith(".")) {
 		base = base.substring(0, base.length - 1);
 		suffix = ".";
 	}

 	const lowerBase = base.toLowerCase();
 	let replacement: string | undefined;

 	replacement = WORD_DICT[lowerBase];
 	if (replacement) {
 		return preserveCase(base, replacement) + suffix;
 	}

 	// Check for plurals (simple 's' suffix)
 	if (lowerBase.length > 1 && lowerBase.endsWith("s")) {
 		const stem = lowerBase.substring(0, lowerBase.length - 1);
 		replacement = WORD_DICT[stem];
 		if (replacement) {
 			return preserveCase(base, `${replacement}s`) + suffix;
 		}
 	}

 	const ruledBase = applySuffixRules(base);
 	if (ruledBase !== base) {
 		return preserveCase(base, ruledBase) + suffix;
 	}

 	return token; // Return original token if no rule applied
 };

 async function convertText(inputText: string): Promise<string> {
 	let result = inputText.replace(phraseRe, (matchedPhrase) => {
 		const lowerPhrase = matchedPhrase.toLowerCase();
 		const replacement = PHRASE_DICT[lowerPhrase];
 		return replacement
 			? preserveCase(matchedPhrase, replacement)
 			: matchedPhrase;
 	});

 	result = result.replace(wordRe, (matchedWord) =>
 		matchedWord.includes("-")
 			? matchedWord.split("-").map(processToken).join("-")
 			: processToken(matchedWord),
 	);

 	// Clean up whitespace and punctuation at ends
 	// Using Unicode property escapes for broader punctuation matching
 	const finalResult = result
 		.replace(/\s{2,}/g, " ") // Collapse multiple spaces
 		.replace(
 			/^[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]+|[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]+$/g,
 			"",
 		); // Trim leading/trailing space/punctuation

 	// Always preserve case
 	return finalResult;
 }

 // Get arguments, excluding the node executable and script name
 const args = process.argv.slice(2);

 if (args.length > 0) {
 	// Use the first argument as input, for testing
 	const inputText = args[0];
 	if (inputText !== undefined) {
 		convertText(inputText).then(console.log);
 	}
 } else {
 	// No arguments (production behaviour), read from stdin
 	let input = "";
 	stdin.setEncoding("utf8");

 	stdin.on("readable", () => {
 		let chunk: string | Buffer | null;
 		// biome-ignore lint/suspicious/noAssignInExpressions: Required for readable stream processing
 		while ((chunk = stdin.read()) !== null) {
 			input += chunk.toString(); // Ensure chunk is treated as string
 		}
 	});

 	stdin.on("end", () => {
 		convertText(input.trim()).then(console.log);
 	});
 }
	import { stdin } from "node:process";

	interface SuffixRule {
	re: RegExp;
	rep: string;
	}

	const PHRASE_DICT: Readonly<Record<string, string>> = Object.freeze({
	"cell phone": "mobile phone",
	"french fries": "chips",
	});

	const WORD_DICT: Readonly<Record<string, string>> = Object.freeze({
	truck: "lorry",
	cookie: "biscuit",
	color: "colour",
	flavor: "flavour",
	honor: "honour",
	neighbor: "neighbour",
	behavior: "behaviour",
	favorite: "favourite",
	program: "programme",
	draft: "draught",
	analyze: "analyse",
	defense: "defence",
	offense: "offence",
	license: "licence",
	dialog: "dialogue",
	gray: "grey",
	humor: "humour",
	analog: "analogue",
	catalog: "catalogue",
	practice: "practise",
	practicing: "practising",
	sidewalk: "pavement",
	gas: "petrol",
	elevator: "lift",
	donut: "doughnut",
	airplane: "aeroplane",
	synthesize: "synthesise",
	center: "centre",
	meter: "metre",
	theater: "theatre",
	anemia: "anaemia",
	diarrhea: "diarrhoea",
	encyclopedia: "encyclopaedia",
	estrogen: "oestrogen",
	"mr.": "mr",
	"mrs.": "mrs",
	"dr.": "dr",
	});

	const phraseRe = new RegExp(
	`\\\\b(${Object.keys(PHRASE_DICT)
	.sort((a, b) => b.length - a.length)
	.map((k) => k.replace(/[.*+?^${}()\|[\]\\\\]/g, "\\\\$&"))
	.join("\|")})\\\\b`,
	"gi",
	);

	const wordRe = /[A-Za-z]+(?:'[A-Za-z]+)?\\.?\|[A-Za-z]+(?:-[A-Za-z]+)+/g;

	const SUFFIX_RULES: ReadonlyArray<SuffixRule> = Object.freeze([
	{ re: /(?<!analy)iz(e)?(s\|d\|er\|ers\|ing)?\\b/i, rep: "is$1$2" },
	{ re: /(\\w*[aeiou])l(ed\|ing\|er\|ers)\\b/i, rep: "$1ll$2" },
	]);

	const isInitialCap = (s: string): boolean => {
	if (!s) return false;
	const code = s.charCodeAt(0);
	return code >= 65 && code <= 90; // ASCII A-Z
	};

	const preserveCase = (
	originalWord: string,
	replacementWord: string,
	): string => {
	if (originalWord === originalWord.toUpperCase()) {
	return replacementWord.toUpperCase();
	}
	return isInitialCap(originalWord)
	? replacementWord.charAt(0).toUpperCase() +
	replacementWord.substring(1).toLowerCase()
	: replacementWord.toLowerCase();
	};

	const applySuffixRules = (baseWord: string): string => {
	for (const rule of SUFFIX_RULES) {
	if (rule.re.test(baseWord)) {
	return baseWord.replace(rule.re, rule.rep);
	}
	}
	return baseWord;
	};

	const processToken = (token: string): string => {
	let base = token;
	let suffix = "";

	if (base.endsWith("'s")) {
	base = base.substring(0, base.length - 2);
	suffix = "'s";
	} else if (base.endsWith(".")) {
	base = base.substring(0, base.length - 1);
	suffix = ".";
	}

	const lowerBase = base.toLowerCase();
	let replacement: string \| undefined;

	replacement = WORD_DICT[lowerBase];
	if (replacement) {
	return preserveCase(base, replacement) + suffix;
	}

	// Check for plurals (simple 's' suffix)
	if (lowerBase.length > 1 && lowerBase.endsWith("s")) {
	const stem = lowerBase.substring(0, lowerBase.length - 1);
	replacement = WORD_DICT[stem];
	if (replacement) {
	return preserveCase(base, `${replacement}s`) + suffix;
	}
	}

	const ruledBase = applySuffixRules(base);
	if (ruledBase !== base) {
	return preserveCase(base, ruledBase) + suffix;
	}

	return token; // Return original token if no rule applied
	};

	async function convertText(inputText: string): Promise<string> {
	let result = inputText.replace(phraseRe, (matchedPhrase) => {
	const lowerPhrase = matchedPhrase.toLowerCase();
	const replacement = PHRASE_DICT[lowerPhrase];
	return replacement
	? preserveCase(matchedPhrase, replacement)
	: matchedPhrase;
	});

	result = result.replace(wordRe, (matchedWord) =>
	matchedWord.includes("-")
	? matchedWord.split("-").map(processToken).join("-")
	: processToken(matchedWord),
	);

	// Clean up whitespace and punctuation at ends
	// Using Unicode property escapes for broader punctuation matching
	const finalResult = result
	.replace(/\s{2,}/g, " ") // Collapse multiple spaces
	.replace(
	/^[\s!"#$%&'()+,-./:;<=>?@[\]^_`{\|}~]+\|[\s!"#$%&'()+,-./:;<=>?@[\]^_`{\|}~]+$/g,
	"",
	); // Trim leading/trailing space/punctuation

	// Always preserve case
	return finalResult;
	}

	// Get arguments, excluding the node executable and script name
	const args = process.argv.slice(2);

	if (args.length > 0) {
	// Use the first argument as input, for testing
	const inputText = args[0];
	if (inputText !== undefined) {
	convertText(inputText).then(console.log);
	}
	} else {
	// No arguments (production behaviour), read from stdin
	let input = "";
	stdin.setEncoding("utf8");

	stdin.on("readable", () => {
	let chunk: string \| Buffer \| null;
	// biome-ignore lint/suspicious/noAssignInExpressions: Required for readable stream processing
	while ((chunk = stdin.read()) !== null) {
	input += chunk.toString(); // Ensure chunk is treated as string
	}
	});

	stdin.on("end", () => {
	convertText(input.trim()).then(console.log);
	});
	}