Created
February 10, 2020 17:55
-
-
Save Sirsirious/a2dbd05186e4a935f297cbc71b968be9 to your computer and use it in GitHub Desktop.
First Step of the Stemmer.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def _porter_step_1(self, word): | |
| """ | |
| Deals with plurals and past participles. | |
| """ | |
| stem = word | |
| stepb2 = False | |
| #Step 1a | |
| if stem.endswith('sses'): | |
| stem = stem[:-2] | |
| elif stem.endswith('ies'): | |
| stem = stem[:-2] | |
| elif not stem.endswith('ss') and stem.endswith("s"): | |
| stem = stem[:-1] | |
| #Step 1b | |
| if len(stem) > 4: | |
| if stem.endswith("eed") and self._det_m(stem) > 0: | |
| stem = stem[:-1] | |
| elif stem.endswith("ed"): | |
| stem = stem[:-2] | |
| if not self._chk_v(stem): | |
| stem = word | |
| else: | |
| stepb2 = True | |
| elif stem.endswith("ing"): | |
| stem = stem[:-3] | |
| if not self._chk_v(stem): | |
| stem = word | |
| else: | |
| stepb2 = True | |
| #Step 1b.2 | |
| if stepb2: | |
| if stem.endswith("at") or stem.endswith("bl") or stem.endswith("iz"): | |
| stem += "e" | |
| elif self._chk_d(stem) and not (self._chk_LT(stem,"lsz")): | |
| stem = stem[:-1] | |
| elif self._det_m(stem)==1 and self._chk_o(stem): | |
| stem += "e" | |
| #Step 1c | |
| if self._chk_v(stem) and stem.endswith('y'): | |
| stem = stem[:-1]+'i' | |
| return stem |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment