Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save amrakm/f676904d0d23ba23eb2ba9243c277ca1 to your computer and use it in GitHub Desktop.
Save amrakm/f676904d0d23ba23eb2ba9243c277ca1 to your computer and use it in GitHub Desktop.
find sentences that contains a keyword - stemmed string match
from nltk.stem.porter import PorterStemmer
import re
def extract_sentence_that_contain_keyword(keyword, text):
stemmer = PorterStemmer()
stemmed_keyword = stemmer.stem(keyword)
stemmed_text = ' '.join([stemmer.stem(x) for x in text.split()])
print(stemmed_text)
stemmed_sentences = re.split(r'(?<=[.!?]) +', stemmed_text.lower())
sentences = re.split(r'(?<=[.!?]) +', text.lower())
for stemmed_sentence, sentence in zip(stemmed_sentences, sentences):
if stemmed_keyword in stemmed_sentence:
return sentence
return 'not found'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment