Skip to content

Instantly share code, notes, and snippets.

@lattejed
Created June 10, 2013 07:55
Show Gist options
  • Save lattejed/5747158 to your computer and use it in GitHub Desktop.
Save lattejed/5747158 to your computer and use it in GitHub Desktop.
{-
Step 2
(m>0) ATIONAL -> ATE
...
(m>0) BILITI -> BLE
-}
step2 :: String -> String
step2 str =
if (not $ null sfxs') && (measure $ snd $ wordDesc $ stm (fst sfx)) > 0
then swapsfx str (fst sfx) (snd sfx)
else str
where f = \ss -> not $ fst ss `isSuffixOf` toLowers str
stm = getstem str
sfx = head $ sfxs'
sfxs' = dropWhile f sfxs
sfxs = [
("ational", "ate" ),
("tional" , "tion"),
("enci" , "ence"),
("anci" , "ance"),
("izer" , "ize" ),
("abli" , "able"),
("alli" , "al" ),
("entli" , "ent" ),
("eli" , "e" ),
("ousli" , "ous" ),
("ization", "ize" ),
("ation" , "ate" ),
("ator" , "ate" ),
("alism" , "al" ),
("iveness", "ive" ),
("fulness", "ful" ),
("ousness", "ous" ),
("aliti" , "al" ),
("iviti" , "ive" ),
("biliti" , "ble" )]
{-
Step 3
(m>0) ICATE -> IC
...
(m>0) NESS ->
-}
step3 :: String -> String
step3 str =
if (not $ null sfxs') && (measure $ snd $ wordDesc $ stm (fst sfx)) > 0
then swapsfx str (fst sfx) (snd sfx)
else str
where f = \ss -> not $ fst ss `isSuffixOf` toLowers str
stm = getstem str
sfx = head $ sfxs'
sfxs' = dropWhile f sfxs
sfxs = [
("icate", "ic"),
("ative", "" ),
("alize", "al"),
("iciti", "ic"),
("ical" , "ic"),
("ful" , "" ),
("ness" , "" )]
{-
Step 4
(m>1) AL ->
...
(m>1 and (*S or *T)) ION ->
...
(m>1) IZE ->
-}
step4 :: String -> String
step4 str =
if (not $ null sfxs') && (measure $ snd $ wordDesc $ stm (fst sfx)) > 1
then swapsfx str (fst sfx) (snd sfx)
else str
where f = \ss -> not $ fst ss `isSuffixOf` toLowers str
stm = getstem str
sfx = head $ sfxs'
sfxs' = dropWhile f sfxs
sfxs = [
("al" , "" ),
("ance" , "" ),
("ence" , "" ),
("er" , "" ),
("ic" , "" ),
("able" , "" ),
("ible" , "" ),
("ant" , "" ),
("ement", "" ),
("ment" , "" ),
("ent" , "" ),
("sion" , "s"),
("tion" , "t"),
("ou" , "" ),
("ism" , "" ),
("ate" , "" ),
("iti" , "" ),
("ous" , "" ),
("ive" , "" ),
("ize" , "" )]
{-
Step 5a
(m>1) E ->
(m=1 and not *o) E ->
-}
step5a :: String -> String
step5a str
| sfx "e" && (measure $ snd $ wordDesc $ stm "e") > 1 = swp "e" ""
| sfx "e" && (measure $ snd $ wordDesc $ stm "e") == 1 && noto = swp "e" ""
| otherwise = str
where noto = not $ endscvc $ wordDesc $ stm "e"
stm = getstem str
sfx = (`isSuffixOf` toLowers str)
swp = swapsfx str
{-
Step 5b
(m > 1 and *d and *L) ->
-}
step5b :: String -> String
step5b str =
if (measure $ snd $ wordDesc str) > 1 && dblc && ends 'l'
then init str
else str
where dblc = endsdblc $ wordDesc str
ends = endswith str
{-
Our public stemming function
-}
stem :: String -> String
stem str =
steps str
where steps = step5 . step4 . step3 . step2 . step1
step1 = step1c . step1b . step1a
step5 = step5b . step5a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment