Created
May 26, 2022 10:16
-
-
Save amrakm/f676904d0d23ba23eb2ba9243c277ca1 to your computer and use it in GitHub Desktop.
find sentences that contains a keyword - stemmed string match
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.stem.porter import PorterStemmer | |
import re | |
def extract_sentence_that_contain_keyword(keyword, text): | |
stemmer = PorterStemmer() | |
stemmed_keyword = stemmer.stem(keyword) | |
stemmed_text = ' '.join([stemmer.stem(x) for x in text.split()]) | |
print(stemmed_text) | |
stemmed_sentences = re.split(r'(?<=[.!?]) +', stemmed_text.lower()) | |
sentences = re.split(r'(?<=[.!?]) +', text.lower()) | |
for stemmed_sentence, sentence in zip(stemmed_sentences, sentences): | |
if stemmed_keyword in stemmed_sentence: | |
return sentence | |
return 'not found' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment