chris-taylor · December 11, 2015 09:08
diff --git a/common.txt b/common.txt
 the
 be
 am
 are
 is
 was
 were
 been
 to
 of
 and
 a
 in
 that
 have
 has
 had
 I
 it
 for
 not
 on
 with
 he
 as
 you
 do
 does
 did
 at
 this
 but
 his
 by
 from
 they
 we
 say
 says
 said
 her
 she
 or
 an
 will
 my
 one
 all
 would
 there
 their
 what
 so
 up
 out
 if
 about
 who
 get
 gets
 got
 which
 where
 how
 go
 me
 when
 make
 makes
 made
 can
 like
 likes
 liked
 time
 no
 just
 him
 know
 knew
 knows
 take
 takes
 took
 people
 into
 year
 your
 good
 some
 could
 them
 see
 sees
 saw
 other
 than
 then
 now
 look
 looks
 looked
 only
 come
 comes
 came
 its
 over
 think
 thinks
 thought
 also
 back
 after
 use
 uses
 used
 two
 our
 work
 works
 worked
 first
 well
 way
 even
 new
 want
 wants
 wanted
 because
 any
 these
 give
 gives
 gave
 day
 most
 us
 may
 more
 such
 should
 very
 those
 same
 must
diff --git a/jasper.py b/jasper.py
 from collections import Counter
 import string

 def remove_punctuation(word):
  return word.translate(string.maketrans("",""), string.punctuation)

 # Read in list of common words in lowercase.

 common_words = [word.lower() for word in open('common.txt').read().splitlines()]

 # read in file contents and add words to list. Strip punctuation and change
 # to lowercase first.

 lines = open('big.txt').read().splitlines()

 words = []
 for line in lines:
  for word in line.split():
    words.append(remove_punctuation(word).lower())
    
 # Use a Counter to find the most common words that aren't in the common_words
 # list.

 cnt = Counter(words)

 for word in common_words:
  cnt[word] = 0

 print cnt.most_common(10)
	the
	be
	am
	are
	is
	was
	were
	been
	to
	of
	and
	a
	in
	that
	have
	has
	had
	I
	it
	for
	not
	on
	with
	he
	as
	you
	do
	does
	did
	at
	this
	but
	his
	by
	from
	they
	we
	say
	says
	said
	her
	she
	or
	an
	will
	my
	one
	all
	would
	there
	their
	what
	so
	up
	out
	if
	about
	who
	get
	gets
	got
	which
	where
	how
	go
	me
	when
	make
	makes
	made
	can
	like
	likes
	liked
	time
	no
	just
	him
	know
	knew
	knows
	take
	takes
	took
	people
	into
	year
	your
	good
	some
	could
	them
	see
	sees
	saw
	other
	than
	then
	now
	look
	looks
	looked
	only
	come
	comes
	came
	its
	over
	think
	thinks
	thought
	also
	back
	after
	use
	uses
	used
	two
	our
	work
	works
	worked
	first
	well
	way
	even
	new
	want
	wants
	wanted
	because
	any
	these
	give
	gives
	gave
	day
	most
	us
	may
	more
	such
	should
	very
	those
	same
	must
	from collections import Counter
	import string

	def remove_punctuation(word):
	return word.translate(string.maketrans("",""), string.punctuation)

	# Read in list of common words in lowercase.

	common_words = [word.lower() for word in open('common.txt').read().splitlines()]

	# read in file contents and add words to list. Strip punctuation and change
	# to lowercase first.

	lines = open('big.txt').read().splitlines()

	words = []
	for line in lines:
	for word in line.split():
	words.append(remove_punctuation(word).lower())

	# Use a Counter to find the most common words that aren't in the common_words
	# list.

	cnt = Counter(words)

	for word in common_words:
	cnt[word] = 0

	print cnt.most_common(10)