Last active
February 7, 2020 19:37
-
-
Save Sirsirious/9c2fbf2b0e5536bf1bd22c92df28190b to your computer and use it in GitHub Desktop.
Function to divide a word into groups of vowels or consonants.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class PorterStemmer(AbstractStemmer): | |
| consonants = "bcdfghjklmnpqrstwxz" | |
| special_case = "y" | |
| vowels = "aeiou" | |
| def _divide_into_groups(self, word): | |
| groups = [] | |
| preceding = "" | |
| for idx, letter in enumerate(word.lower()): | |
| if preceding == "": | |
| preceding = letter | |
| else: | |
| if self._compare_same_class(preceding, letter): | |
| preceding+= letter | |
| if idx == len(word)-1: | |
| groups.append(preceding) | |
| else: | |
| groups.append(preceding) | |
| preceding = letter | |
| if idx == len(word)-1: | |
| groups.append(letter) | |
| return groups | |
| def _compare_same_class(self, l1, l2): | |
| if l1 in self.consonants and l2 in self.consonants: | |
| return True | |
| elif l1 in self.vowels and l2 in self.vowels: | |
| return True | |
| else: | |
| return False | |
| return False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment