fnielsen · November 6, 2022 12:32 · Riyasharma-in · Dec 7, 2020 · fnielsen · Dec 20, 2020
diff --git a/brownzipf.py b/brownzipf.py
 from __future__ import division
 from itertools import *
 from pylab import *
 from nltk.corpus import brown
 from string import lower
 from collections import Counter


 # The data: token counts from the Brown corpus
 tokens_with_count = Counter(imap(lower, brown.words()))
 counts = array(tokens_with_count.values())
 tokens = tokens_with_count.keys()

 # A Zipf plot
 ranks = arange(1, len(counts)+1)
 indices = argsort(-counts)
 frequencies = counts[indices]
 loglog(ranks, frequencies, marker=".")
 title("Zipf plot for Brown corpus tokens")
 xlabel("Frequency rank of token")
 ylabel("Absolute frequency of token")
 grid(True)
 for n in list(logspace(-0.5, log10(len(counts)), 20).astype(int)):
    dummy = text(ranks[n], frequencies[n], " " + tokens[indices[n]], 
                 verticalalignment="bottom",
                 horizontalalignment="left")

 show()
	from __future__ import division
	from itertools import *
	from pylab import *
	from nltk.corpus import brown
	from string import lower
	from collections import Counter


	# The data: token counts from the Brown corpus
	tokens_with_count = Counter(imap(lower, brown.words()))
	counts = array(tokens_with_count.values())
	tokens = tokens_with_count.keys()

	# A Zipf plot
	ranks = arange(1, len(counts)+1)
	indices = argsort(-counts)
	frequencies = counts[indices]
	loglog(ranks, frequencies, marker=".")
	title("Zipf plot for Brown corpus tokens")
	xlabel("Frequency rank of token")
	ylabel("Absolute frequency of token")
	grid(True)
	for n in list(logspace(-0.5, log10(len(counts)), 20).astype(int)):
	dummy = text(ranks[n], frequencies[n], " " + tokens[indices[n]],
	verticalalignment="bottom",
	horizontalalignment="left")

	show()
No results found