Skip to content

Instantly share code, notes, and snippets.

@MatthewBlanchard
Created December 22, 2019 18:02
Show Gist options
  • Save MatthewBlanchard/f10fca2388d49806d29d7120487f4170 to your computer and use it in GitHub Desktop.
Save MatthewBlanchard/f10fca2388d49806d29d7120487f4170 to your computer and use it in GitHub Desktop.
inflector
local CATEGORY_EX_ICES = {
"codex", "murex", "silex", "vertex", "index"
}
local CATEGORY_IX_ICES = {
"matrix", "radix", "helix"
}
local CATEGORY_UM_A = {
"baterium", "agendum", "desideratum", "erratum", "stratum", "datum", "ovum",
"extremum", "candelabrum"
}
local CATEGORY_US_I = {
"alumnus", "alveolus", "bacillus", "bronchus", "locus", "nucleus", "stimulus",
"meniscus", "thesaurus"
}
local CATEGORY_ON_A = {
"criterion", "perihelion", "aphelion", "phenomenon", "prolegomenon",
"noumenon", "organon", "asyndeton", "hyperbaton"
}
local CATEGORY_A_AE = {
"alumna", "alga", "vertebra", "persona"
}
local CATEGORY_O_OS = {
"albino", "archipelago", "armadillo", "commando", "crescendo", "fiasco",
"ditto", "dynamo", "embryo", "ghetto", "guano", "inferno", "jumbo", "lumbago",
"magneto", "manifesto", "medico", "octavo", "photo", "pro", "quarto", "canto",
"lingo", "generalissimo", "stylo", "rhino", "casino", "auto", "macro", "zero",
"todo"
}
local CATEGORY_O_I = {
"solo", "soprano", "basso", "alto", "contralto", "tempo", "piano", "virtuoso"
}
local CATEGORY_EN_INA = {
"stamen", "foramen", "lumen"
}
local CATEGORY_A_ATA = {
"anathema", "enema", "oedema", "bema", "enigma", "sarcoma", "carcinoma",
"gumma", "schema", "charisma", "lemma", "soma", "diploma", "lymphoma",
"stigma", "dogma", "magma", "stoma", "drama", "melisma", "trauma", "edema",
"miasma"
}
local CATEGORY_IS_IDES = {
"iris", "clitoris"
}
local CATEGORY_US_US = {
"apparatus", "impetus", "prospectus", "cantus", "nexus", "sinus", "coitus",
"plexus", "status", "hiatus"
}
local CATEGORY_NONE_I = {
"afreet", "afrit", "efreet"
}
local CATEGORY_NONE_IM = {
"cherub", "goy", "seraph"
}
local CATEGORY_EX_EXES = {
"apex", "latex", "cortex", "pontifex", "vortex", "simplex"
}
local CATEGORY_IX_IXES = {
"appendix"
}
local CATEGORY_S_ES = {
"acropolis", "chaos", "lens", "aegis", "cosmos", "mantis", "alias", "dais",
"marquis", "asbestos", "digitalis", "metropolis", "atlas", "epidermis",
"pathos", "bathos", "ethos", "pelvis", "bias", "gas", "polis", "caddis",
"glottis", "rhinoceros", "cannabis", "glottis", "sassafras", "canvas", "ibis",
"trellis"
}
local CATEGORY_MAN_MANS = {
"human", "Alabaman", "Bahaman", "Burman", "German", "Hiroshiman", "Liman",
"Nakayaman", "Oklahoman", "Panaman", "Selman", "Sonaman", "Tacoman",
"Yakiman", "Yokohaman", "Yuman"
}
local uncountable = {
--endings
"fish", "ois", "sheep", "deer", "pox", "itis",
-- words
"bison", "flounder", "pliers", "bream", "gallows", "proceedings", "breeches",
"graffiti", "rabies","britches", "headquarters", "salmon", "carp", "herpes",
"scissors", "chassis", "high-jinks", "sea-bass", "clippers", "homework",
"series", "cod", "innings", "shears", "contretemps", "jackanapes", "species",
"corps", "mackerel", "swine", "debris", "measles", "trout", "diabetes",
"mews", "tuna", "djinn", "mumps", "whiting", "eland", "news", "wildebeest",
"elk", "pincers", "sugar"
}
local irregular = {
child = "children",
ephemeris = "ephemerides",
mongoose = "mongoose",
mythos = "mythoi",
ox = "oxen",
soliloquy = "soliloquies",
trilby = "trilbys",
genus = "genera",
quiz = "quizzes",
beef = "beefs",
brother = "brothers",
cow = "cows",
genie = "genies",
money = "moneys",
octopus = "octopuses",
opus = "opuses"
}
local function contains(t, word)
for i, v in ipairs(t) do
if v == word then
return true
end
end
return false
end
local function endsWith(str, ending)
return ending == "" or str:sub(-#ending) == ending
end
local function inflector(mode)
local rules = {}
local function rule(singular, plural)
local rulefunc = function(word)
if string.find(word, singular) then
local capture = ""
if not (string.match(word, singular) == singular) then
capture = string.match(word, singular)
end
local index = string.find(word, singular)
return string.sub(word, 1, index-1) .. capture .. plural
end
return nil
end
table.insert(rules, rulefunc)
end
local function categoryRule(list, singular, plural)
local rulefunc = function(word)
lword = string.lower(word)
for i, suffix in ipairs(list) do
if endsWith(lword, suffix) then
if not endsWith(lword, singular) then
error("Internal error!")
end
local _, index = string.find(word, suffix .. "$")
print(_, index, suffix)
if not (singular == "") then
index = string.find(word, singular .. "$") - 1
print(_, index, singular)
end
return string.sub(word, 1, index) .. plural
end
end
end
table.insert(rules, rulefunc)
end
categoryRule(uncountable, "", "")
categoryRule(CATEGORY_MAN_MANS, "", "s")
-- Handle irregular inflections for common suffixes
rule("man$", "men")
rule("([lm])ouse$", "ice")
rule("tooth$", "teeth")
rule("goose$", "geese")
rule("foot$", "feet")
rule("zoon$", "zoa")
rule("([csx])is$", "es")
-- Handle fully assimilated classical inflections
categoryRule(CATEGORY_EX_ICES, "ex", "ices")
categoryRule(CATEGORY_IX_ICES, "ix", "ices")
categoryRule(CATEGORY_UM_A, "um", "a")
categoryRule(CATEGORY_ON_A, "on", "a")
categoryRule(CATEGORY_A_AE, "a", "ae")
categoryRule(CATEGORY_EN_INA, "en", "ina")
categoryRule(CATEGORY_A_ATA, "a", "ata")
categoryRule(CATEGORY_IS_IDES, "is", "ides")
categoryRule(CATEGORY_US_US, "", "")
categoryRule(CATEGORY_O_I, "o", "i")
categoryRule(CATEGORY_NONE_I, "", "i")
categoryRule(CATEGORY_NONE_IM, "", "im")
categoryRule(CATEGORY_EX_EXES, "ex", "ices")
categoryRule(CATEGORY_IX_IXES, "ix", "ices")
categoryRule(CATEGORY_US_I, "us", "i")
rule("([cs]h)$", "es")
rule("([zx])$", "es")
categoryRule(CATEGORY_S_ES, "", "es")
categoryRule(CATEGORY_IS_IDES, "", "es")
categoryRule(CATEGORY_US_US, "", "es")
rule("(us)$", "es")
categoryRule(CATEGORY_A_ATA, "", "s")
-- churches and such
rule("([cs])h$", "hes")
rule("ss$", "sses")
--wolves and wives
rule("([aeo]l)f$", "ves")
rule("(ar)f$", "ves")
rule("([nlw]i)fe$", "ves")
-- families and rays
rule("([aeiou])y$", "ys")
rule("y$", "ies")
categoryRule(CATEGORY_O_I, "o", "os")
categoryRule(CATEGORY_O_OS, "o", "os")
rule("([aeiou])o$", "os")
rule("o$", "oes")
rule("ulum$", "ula")
categoryRule(CATEGORY_A_ATA, "", "es")
rule("s$", "ses")
rule("$", "s")
return function(word, count)
if not count or count == 1 then
return word
end
for i, rule in ipairs(rules) do
result = rule(word)
if result then return result end
end
return nil
end
end
local inf = inflector()
print(inf("cat", 2))
print(inf("dog", 2))
print(inf("crow", 2))
print(inf("steeple", 2))
print(inf("direwolf", 2))
print(inf("people", 2))
print(inf("efreet", 2))
print(inf("clitoris", 2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment