Skip to content

Instantly share code, notes, and snippets.

@eileen-code4fun
Created January 17, 2022 20:48
Show Gist options
  • Save eileen-code4fun/d7698cbfd7fd3451a7dfd5e1acdaff4d to your computer and use it in GitHub Desktop.
Save eileen-code4fun/d7698cbfd7fd3451a7dfd5e1acdaff4d to your computer and use it in GitHub Desktop.
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
def lower_remove_punctuation_stem(txt):
txt = lower_remove_punctuation(txt)
ps = PorterStemmer()
words = [ps.stem(w) for w in txt.split()]
return ' '.join(words)
def lower_remove_punctuation_lemmatize(txt):
txt = lower_remove_punctuation(txt)
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(w) for w in txt.split()]
return ' '.join(words)
# Use preprocess to create respective training and test datasets.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment