ipha · March 22, 2016 01:23
diff --git a/ngram.py b/ngram.py
 #!/usr/bin/env python3

 from nltk.corpus import brown

 PUNCTUATION = "!\"#$%'()*+,-./:;<=>?@[\\]^_`{|}~"
 MIN_OCCURANCES = 20
 NGRAM_LENGTH = 3

 # Hold a count of words
 countdict = {}

 # Results in form (count, word 1, word 2, word 3)
 results = list()

 words = ("",) * NGRAM_LENGTH

 # Itterate through every word
 for word in brown.words():
    words = words[1:] + (word.lower(),)
    if words in countdict:
        countdict[words] = countdict[words] + 1
    else:
        countdict[words] = 1

 # Itterate through results filtering out punctuation and counts smaller than MIN_OCCURANCES
 for ngram in countdict:
    if countdict[ngram] > MIN_OCCURANCES:
        if all(word not in PUNCTUATION for word in ngram):
            results.append((countdict[ngram], ngram))

 # Print results sorted by number of occurances
 for result in sorted(results, key=lambda result: result[0]):
    print(result)
	#!/usr/bin/env python3

	from nltk.corpus import brown

	PUNCTUATION = "!\"#$%'()*+,-./:;<=>?@[\\]^_`{\|}~"
	MIN_OCCURANCES = 20
	NGRAM_LENGTH = 3

	# Hold a count of words
	countdict = {}

	# Results in form (count, word 1, word 2, word 3)
	results = list()

	words = ("",) * NGRAM_LENGTH

	# Itterate through every word
	for word in brown.words():
	words = words[1:] + (word.lower(),)
	if words in countdict:
	countdict[words] = countdict[words] + 1
	else:
	countdict[words] = 1

	# Itterate through results filtering out punctuation and counts smaller than MIN_OCCURANCES
	for ngram in countdict:
	if countdict[ngram] > MIN_OCCURANCES:
	if all(word not in PUNCTUATION for word in ngram):
	results.append((countdict[ngram], ngram))

	# Print results sorted by number of occurances
	for result in sorted(results, key=lambda result: result[0]):
	print(result)