Skip to content

Instantly share code, notes, and snippets.

@aparrish
Created March 27, 2013 22:13
Show Gist options
  • Save aparrish/5258575 to your computer and use it in GitHub Desktop.
Save aparrish/5258575 to your computer and use it in GitHub Desktop.
making a "concordance" module—slightly better
def tokenize(line):
return line.split(" ")
def feed(concordance, line):
words = tokenize(line)
for word in words:
if word not in concordance:
concordance[word] = 0
concordance[word] += 1
def count_for_word(concordance, word):
if word in concordance:
return concordance[word]
else:
return 0
def unique_words(concordance):
return concordance.keys()
if __name__ == '__main__':
import sys
check_word = sys.argv[1]
concordance = dict()
for line in sys.stdin:
line = line.strip()
feed(concordance, line)
print "count for " + check_word + ": " + str(count_for_word(concordance,
check_word))
print "unique words: " + str(len(unique_words(concordance)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment