Skip to content

Instantly share code, notes, and snippets.

@EIIisD
Created April 24, 2025 21:40
Show Gist options
  • Save EIIisD/2d9cce49b7efc94322a8136d4d4fc585 to your computer and use it in GitHub Desktop.
Save EIIisD/2d9cce49b7efc94322a8136d4d4fc585 to your computer and use it in GitHub Desktop.
import { stdin } from "node:process";
interface SuffixRule {
re: RegExp;
rep: string;
}
const PHRASE_DICT: Readonly<Record<string, string>> = Object.freeze({
"cell phone": "mobile phone",
"french fries": "chips",
});
const WORD_DICT: Readonly<Record<string, string>> = Object.freeze({
truck: "lorry",
cookie: "biscuit",
color: "colour",
flavor: "flavour",
honor: "honour",
neighbor: "neighbour",
behavior: "behaviour",
favorite: "favourite",
program: "programme",
draft: "draught",
analyze: "analyse",
defense: "defence",
offense: "offence",
license: "licence",
dialog: "dialogue",
gray: "grey",
humor: "humour",
analog: "analogue",
catalog: "catalogue",
practice: "practise",
practicing: "practising",
sidewalk: "pavement",
gas: "petrol",
elevator: "lift",
donut: "doughnut",
airplane: "aeroplane",
synthesize: "synthesise",
center: "centre",
meter: "metre",
theater: "theatre",
anemia: "anaemia",
diarrhea: "diarrhoea",
encyclopedia: "encyclopaedia",
estrogen: "oestrogen",
"mr.": "mr",
"mrs.": "mrs",
"dr.": "dr",
});
const phraseRe = new RegExp(
`\\\\b(${Object.keys(PHRASE_DICT)
.sort((a, b) => b.length - a.length)
.map((k) => k.replace(/[.*+?^${}()|[\]\\\\]/g, "\\\\$&"))
.join("|")})\\\\b`,
"gi",
);
const wordRe = /[A-Za-z]+(?:'[A-Za-z]+)?\\.?|[A-Za-z]+(?:-[A-Za-z]+)+/g;
const SUFFIX_RULES: ReadonlyArray<SuffixRule> = Object.freeze([
{ re: /(?<!analy)iz(e)?(s|d|er|ers|ing)?\\b/i, rep: "is$1$2" },
{ re: /(\\w*[aeiou])l(ed|ing|er|ers)\\b/i, rep: "$1ll$2" },
]);
const isInitialCap = (s: string): boolean => {
if (!s) return false;
const code = s.charCodeAt(0);
return code >= 65 && code <= 90; // ASCII A-Z
};
const preserveCase = (
originalWord: string,
replacementWord: string,
): string => {
if (originalWord === originalWord.toUpperCase()) {
return replacementWord.toUpperCase();
}
return isInitialCap(originalWord)
? replacementWord.charAt(0).toUpperCase() +
replacementWord.substring(1).toLowerCase()
: replacementWord.toLowerCase();
};
const applySuffixRules = (baseWord: string): string => {
for (const rule of SUFFIX_RULES) {
if (rule.re.test(baseWord)) {
return baseWord.replace(rule.re, rule.rep);
}
}
return baseWord;
};
const processToken = (token: string): string => {
let base = token;
let suffix = "";
if (base.endsWith("'s")) {
base = base.substring(0, base.length - 2);
suffix = "'s";
} else if (base.endsWith(".")) {
base = base.substring(0, base.length - 1);
suffix = ".";
}
const lowerBase = base.toLowerCase();
let replacement: string | undefined;
replacement = WORD_DICT[lowerBase];
if (replacement) {
return preserveCase(base, replacement) + suffix;
}
// Check for plurals (simple 's' suffix)
if (lowerBase.length > 1 && lowerBase.endsWith("s")) {
const stem = lowerBase.substring(0, lowerBase.length - 1);
replacement = WORD_DICT[stem];
if (replacement) {
return preserveCase(base, `${replacement}s`) + suffix;
}
}
const ruledBase = applySuffixRules(base);
if (ruledBase !== base) {
return preserveCase(base, ruledBase) + suffix;
}
return token; // Return original token if no rule applied
};
async function convertText(inputText: string): Promise<string> {
let result = inputText.replace(phraseRe, (matchedPhrase) => {
const lowerPhrase = matchedPhrase.toLowerCase();
const replacement = PHRASE_DICT[lowerPhrase];
return replacement
? preserveCase(matchedPhrase, replacement)
: matchedPhrase;
});
result = result.replace(wordRe, (matchedWord) =>
matchedWord.includes("-")
? matchedWord.split("-").map(processToken).join("-")
: processToken(matchedWord),
);
// Clean up whitespace and punctuation at ends
// Using Unicode property escapes for broader punctuation matching
const finalResult = result
.replace(/\s{2,}/g, " ") // Collapse multiple spaces
.replace(
/^[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]+|[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]+$/g,
"",
); // Trim leading/trailing space/punctuation
// Always preserve case
return finalResult;
}
// Get arguments, excluding the node executable and script name
const args = process.argv.slice(2);
if (args.length > 0) {
// Use the first argument as input, for testing
const inputText = args[0];
if (inputText !== undefined) {
convertText(inputText).then(console.log);
}
} else {
// No arguments (production behaviour), read from stdin
let input = "";
stdin.setEncoding("utf8");
stdin.on("readable", () => {
let chunk: string | Buffer | null;
// biome-ignore lint/suspicious/noAssignInExpressions: Required for readable stream processing
while ((chunk = stdin.read()) !== null) {
input += chunk.toString(); // Ensure chunk is treated as string
}
});
stdin.on("end", () => {
convertText(input.trim()).then(console.log);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment