Created
April 24, 2025 21:40
-
-
Save EIIisD/2d9cce49b7efc94322a8136d4d4fc585 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { stdin } from "node:process"; | |
interface SuffixRule { | |
re: RegExp; | |
rep: string; | |
} | |
const PHRASE_DICT: Readonly<Record<string, string>> = Object.freeze({ | |
"cell phone": "mobile phone", | |
"french fries": "chips", | |
}); | |
const WORD_DICT: Readonly<Record<string, string>> = Object.freeze({ | |
truck: "lorry", | |
cookie: "biscuit", | |
color: "colour", | |
flavor: "flavour", | |
honor: "honour", | |
neighbor: "neighbour", | |
behavior: "behaviour", | |
favorite: "favourite", | |
program: "programme", | |
draft: "draught", | |
analyze: "analyse", | |
defense: "defence", | |
offense: "offence", | |
license: "licence", | |
dialog: "dialogue", | |
gray: "grey", | |
humor: "humour", | |
analog: "analogue", | |
catalog: "catalogue", | |
practice: "practise", | |
practicing: "practising", | |
sidewalk: "pavement", | |
gas: "petrol", | |
elevator: "lift", | |
donut: "doughnut", | |
airplane: "aeroplane", | |
synthesize: "synthesise", | |
center: "centre", | |
meter: "metre", | |
theater: "theatre", | |
anemia: "anaemia", | |
diarrhea: "diarrhoea", | |
encyclopedia: "encyclopaedia", | |
estrogen: "oestrogen", | |
"mr.": "mr", | |
"mrs.": "mrs", | |
"dr.": "dr", | |
}); | |
const phraseRe = new RegExp( | |
`\\\\b(${Object.keys(PHRASE_DICT) | |
.sort((a, b) => b.length - a.length) | |
.map((k) => k.replace(/[.*+?^${}()|[\]\\\\]/g, "\\\\$&")) | |
.join("|")})\\\\b`, | |
"gi", | |
); | |
const wordRe = /[A-Za-z]+(?:'[A-Za-z]+)?\\.?|[A-Za-z]+(?:-[A-Za-z]+)+/g; | |
const SUFFIX_RULES: ReadonlyArray<SuffixRule> = Object.freeze([ | |
{ re: /(?<!analy)iz(e)?(s|d|er|ers|ing)?\\b/i, rep: "is$1$2" }, | |
{ re: /(\\w*[aeiou])l(ed|ing|er|ers)\\b/i, rep: "$1ll$2" }, | |
]); | |
const isInitialCap = (s: string): boolean => { | |
if (!s) return false; | |
const code = s.charCodeAt(0); | |
return code >= 65 && code <= 90; // ASCII A-Z | |
}; | |
const preserveCase = ( | |
originalWord: string, | |
replacementWord: string, | |
): string => { | |
if (originalWord === originalWord.toUpperCase()) { | |
return replacementWord.toUpperCase(); | |
} | |
return isInitialCap(originalWord) | |
? replacementWord.charAt(0).toUpperCase() + | |
replacementWord.substring(1).toLowerCase() | |
: replacementWord.toLowerCase(); | |
}; | |
const applySuffixRules = (baseWord: string): string => { | |
for (const rule of SUFFIX_RULES) { | |
if (rule.re.test(baseWord)) { | |
return baseWord.replace(rule.re, rule.rep); | |
} | |
} | |
return baseWord; | |
}; | |
const processToken = (token: string): string => { | |
let base = token; | |
let suffix = ""; | |
if (base.endsWith("'s")) { | |
base = base.substring(0, base.length - 2); | |
suffix = "'s"; | |
} else if (base.endsWith(".")) { | |
base = base.substring(0, base.length - 1); | |
suffix = "."; | |
} | |
const lowerBase = base.toLowerCase(); | |
let replacement: string | undefined; | |
replacement = WORD_DICT[lowerBase]; | |
if (replacement) { | |
return preserveCase(base, replacement) + suffix; | |
} | |
// Check for plurals (simple 's' suffix) | |
if (lowerBase.length > 1 && lowerBase.endsWith("s")) { | |
const stem = lowerBase.substring(0, lowerBase.length - 1); | |
replacement = WORD_DICT[stem]; | |
if (replacement) { | |
return preserveCase(base, `${replacement}s`) + suffix; | |
} | |
} | |
const ruledBase = applySuffixRules(base); | |
if (ruledBase !== base) { | |
return preserveCase(base, ruledBase) + suffix; | |
} | |
return token; // Return original token if no rule applied | |
}; | |
async function convertText(inputText: string): Promise<string> { | |
let result = inputText.replace(phraseRe, (matchedPhrase) => { | |
const lowerPhrase = matchedPhrase.toLowerCase(); | |
const replacement = PHRASE_DICT[lowerPhrase]; | |
return replacement | |
? preserveCase(matchedPhrase, replacement) | |
: matchedPhrase; | |
}); | |
result = result.replace(wordRe, (matchedWord) => | |
matchedWord.includes("-") | |
? matchedWord.split("-").map(processToken).join("-") | |
: processToken(matchedWord), | |
); | |
// Clean up whitespace and punctuation at ends | |
// Using Unicode property escapes for broader punctuation matching | |
const finalResult = result | |
.replace(/\s{2,}/g, " ") // Collapse multiple spaces | |
.replace( | |
/^[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]+|[\s!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]+$/g, | |
"", | |
); // Trim leading/trailing space/punctuation | |
// Always preserve case | |
return finalResult; | |
} | |
// Get arguments, excluding the node executable and script name | |
const args = process.argv.slice(2); | |
if (args.length > 0) { | |
// Use the first argument as input, for testing | |
const inputText = args[0]; | |
if (inputText !== undefined) { | |
convertText(inputText).then(console.log); | |
} | |
} else { | |
// No arguments (production behaviour), read from stdin | |
let input = ""; | |
stdin.setEncoding("utf8"); | |
stdin.on("readable", () => { | |
let chunk: string | Buffer | null; | |
// biome-ignore lint/suspicious/noAssignInExpressions: Required for readable stream processing | |
while ((chunk = stdin.read()) !== null) { | |
input += chunk.toString(); // Ensure chunk is treated as string | |
} | |
}); | |
stdin.on("end", () => { | |
convertText(input.trim()).then(console.log); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment