Skip to content

Instantly share code, notes, and snippets.

@bowbowbow
Created December 3, 2018 16:24
Show Gist options
  • Select an option

  • Save bowbowbow/8ae3c4fa278ac3868d4e6cbd6dc5cfa3 to your computer and use it in GitHub Desktop.

Select an option

Save bowbowbow/8ae3c4fa278ac3868d4e6cbd6dc5cfa3 to your computer and use it in GitHub Desktop.
import utils
import nltk
from pprint import pprint
def find_document_by_keyword(keyword):
df = utils.data_load()
count = 0
for index, row in df.iterrows():
title = row['title']
body = row[' body']
if keyword in title or keyword in body:
count += 1
print('{}: {}'.format(keyword, count))
def find_sentence_by_keyword(keyword):
df = utils.data_load()
sents = []
for index, row in df.iterrows():
title = row['title']
if keyword in title: sents.append(title)
body = row[' body']
for sent in nltk.sent_tokenize(body):
if keyword in sent:
sents += nltk.sent_tokenize(sent)
print('{} sents size: {}'.format(keyword, len(sents)))
if __name__ == "__main__":
find_document_by_keyword('bribery')
find_sentence_by_keyword('bribery')
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment