Skip to content

Instantly share code, notes, and snippets.

@eristoddle
Created September 19, 2012 17:32
Show Gist options
  • Save eristoddle/3750973 to your computer and use it in GitHub Desktop.
Save eristoddle/3750973 to your computer and use it in GitHub Desktop.
Python simple ngrams
def multigrams(text, lower, upper):
grams = []
for i in range(lower, upper):
grams.extend(ngrams(text, i))
return grams
def multigrams_count(text, lower, upper):
counted_grams = {}
for n in multigrams(text, lower, upper):
try:
if " ".join(map(str,n)) in counted_grams:
counted_grams[" ".join(map(str,n))] = counted_grams[" ".join(map(str,n))] + 1
else:
counted_grams[" ".join(map(str,n))] = 1
except:
pass
return counted_grams
def multigrams_sort_filter(text, lowern, uppern, threshold):
counted_grams = multigrams_count(text,lowern,uppern)
filtered_grams = {k : v for k,v in counted_grams.iteritems() if v >= threshold}
return sorted(filtered_grams.iteritems(), key=operator.itemgetter(1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment