biranchi2018 · August 21, 2019 07:01
diff --git a/bigram_tokens.py b/bigram_tokens.py
 import nltk
 nltk.download('punkt')
 import itertools 

 text = "today is 'Nayan's birthday. she loves ice cream. she is also fond of cream cake. we will celebrate her birthday with ice cream cake"

 sentences = nltk.sent_tokenize(text)
 words = [nltk.word_tokenize(sent) for sent in sentences]
 print(words)


 flattened_list  = list(itertools.chain(*words))
 flattened_list

 len(flattened_list)
 # prints 28

 from nltk.util import ngrams
 tokens = [token for token in flattened_list if token != ""]
 output = list(ngrams(tokens, 2))
 output

 '''
 Output:

 [('today', 'is'),
 ('is', "'Nayan"),
 ("'Nayan", "'s"),
 ("'s", 'birthday'),
 ('birthday', '.'),
 ('.', 'she'),
 ('she', 'loves'),
 ('loves', 'ice'),
 ('ice', 'cream'),
 ('cream', '.'),
 ('.', 'she'),
 ('she', 'is'),
 ('is', 'also'),
 ('also', 'fond'),
 ('fond', 'of'),
 ('of', 'cream'),
 ('cream', 'cake'),
 ('cake', '.'),
 ('.', 'we'),
 ('we', 'will'),
 ('will', 'celebrate'),
 ('celebrate', 'her'),
 ('her', 'birthday'),
 ('birthday', 'with'),
 ('with', 'ice'),
 ('ice', 'cream'),
 ('cream', 'cake')]
 '''

 len(output)
 # prints 27
	import nltk
	nltk.download('punkt')
	import itertools

	text = "today is 'Nayan's birthday. she loves ice cream. she is also fond of cream cake. we will celebrate her birthday with ice cream cake"

	sentences = nltk.sent_tokenize(text)
	words = [nltk.word_tokenize(sent) for sent in sentences]
	print(words)


	flattened_list = list(itertools.chain(*words))
	flattened_list

	len(flattened_list)
	# prints 28

	from nltk.util import ngrams
	tokens = [token for token in flattened_list if token != ""]
	output = list(ngrams(tokens, 2))
	output

	'''
	Output:

	[('today', 'is'),
	('is', "'Nayan"),
	("'Nayan", "'s"),
	("'s", 'birthday'),
	('birthday', '.'),
	('.', 'she'),
	('she', 'loves'),
	('loves', 'ice'),
	('ice', 'cream'),
	('cream', '.'),
	('.', 'she'),
	('she', 'is'),
	('is', 'also'),
	('also', 'fond'),
	('fond', 'of'),
	('of', 'cream'),
	('cream', 'cake'),
	('cake', '.'),
	('.', 'we'),
	('we', 'will'),
	('will', 'celebrate'),
	('celebrate', 'her'),
	('her', 'birthday'),
	('birthday', 'with'),
	('with', 'ice'),
	('ice', 'cream'),
	('cream', 'cake')]
	'''

	len(output)
	# prints 27