caseykulm · July 16, 2019 16:40
diff --git a/ngrams.py b/ngrams.py
 # Adapted from this SO post https://stackoverflow.com/a/14670769/1229735

 from collections import Counter

 counter = Counter()
 text = '' # Put your text here. It helps to prune stuff from the input that you consider noise.

 min_substring_length = 7
 max_substring_length = 15 # len(text) + 1 # for max length

 def firstCharacterIsUpper(input):
    return input[:1].isupper()

 def upperCaseLimit(input, min, max):
    count = 0
    for char in input:
        if (char.isupper()):
            count += 1
    return min <= count and count <= max

 def containsNoSpaces(input):
    return ' ' not in input

 # end = rom min_substring_length to end
 for length in range(min_substring_length, max_substring_length):
    end = len(text) - length
    # start = From 0 to end - length
    for start in range(end):
        current_end = start + length
        ngram = text[start:current_end]
        # print("start: {0}, end: {1}, ngram: {2}".format(start, current_end, ngram))
        # if (firstCharacterIsUpper(ngram) and containsNoSpaces(ngram) and upperCaseLimit(ngram, 1, 2)):
        counter[ngram] += 1

 print(Counter(counter).most_common(100))
	# Adapted from this SO post https://stackoverflow.com/a/14670769/1229735

	from collections import Counter

	counter = Counter()
	text = '' # Put your text here. It helps to prune stuff from the input that you consider noise.

	min_substring_length = 7
	max_substring_length = 15 # len(text) + 1 # for max length

	def firstCharacterIsUpper(input):
	return input[:1].isupper()

	def upperCaseLimit(input, min, max):
	count = 0
	for char in input:
	if (char.isupper()):
	count += 1
	return min <= count and count <= max

	def containsNoSpaces(input):
	return ' ' not in input

	# end = rom min_substring_length to end
	for length in range(min_substring_length, max_substring_length):
	end = len(text) - length
	# start = From 0 to end - length
	for start in range(end):
	current_end = start + length
	ngram = text[start:current_end]
	# print("start: {0}, end: {1}, ngram: {2}".format(start, current_end, ngram))
	# if (firstCharacterIsUpper(ngram) and containsNoSpaces(ngram) and upperCaseLimit(ngram, 1, 2)):
	counter[ngram] += 1

	print(Counter(counter).most_common(100))