Skip to content

Instantly share code, notes, and snippets.

@Sirsirious
Created February 10, 2020 17:55
Show Gist options
  • Select an option

  • Save Sirsirious/a2dbd05186e4a935f297cbc71b968be9 to your computer and use it in GitHub Desktop.

Select an option

Save Sirsirious/a2dbd05186e4a935f297cbc71b968be9 to your computer and use it in GitHub Desktop.
First Step of the Stemmer.
def _porter_step_1(self, word):
"""
Deals with plurals and past participles.
"""
stem = word
stepb2 = False
#Step 1a
if stem.endswith('sses'):
stem = stem[:-2]
elif stem.endswith('ies'):
stem = stem[:-2]
elif not stem.endswith('ss') and stem.endswith("s"):
stem = stem[:-1]
#Step 1b
if len(stem) > 4:
if stem.endswith("eed") and self._det_m(stem) > 0:
stem = stem[:-1]
elif stem.endswith("ed"):
stem = stem[:-2]
if not self._chk_v(stem):
stem = word
else:
stepb2 = True
elif stem.endswith("ing"):
stem = stem[:-3]
if not self._chk_v(stem):
stem = word
else:
stepb2 = True
#Step 1b.2
if stepb2:
if stem.endswith("at") or stem.endswith("bl") or stem.endswith("iz"):
stem += "e"
elif self._chk_d(stem) and not (self._chk_LT(stem,"lsz")):
stem = stem[:-1]
elif self._det_m(stem)==1 and self._chk_o(stem):
stem += "e"
#Step 1c
if self._chk_v(stem) and stem.endswith('y'):
stem = stem[:-1]+'i'
return stem
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment