jeroenjanssens · April 25, 2014 02:17 · leondutoit · Jun 9, 2014
diff --git a/topwords.py b/topwords.py
 #!/usr/bin/env python
 import re
 import sys
 from collections import Counter
 num_words = int(sys.argv[1])
 text = sys.stdin.read()
 text = text.lower()
 words = re.split('\W+', text)
 cnt = Counter(words)
 for word, count in cnt.most_common(num_words):
    print "%8d %s" % (count, word)
diff --git a/topwords.R b/topwords.R
 #!/usr/bin/env Rscript
 num.words <- as.integer(commandArgs(trailingOnly = TRUE))
 f <- file("stdin")
 input.lines <- readLines(f)
 close(f)
 full.text <- tolower(paste(input.lines, collapse = " "))
 splits <- gregexpr("\\w+", full.text)
 words.all <- (regmatches(full.text, splits)[[1]])
 words.unique <- as.data.frame(table(words.all))
 words.sorted <- words.unique[order(-words.unique$Freq),]
 dummy <- mapply(function(w, c) {
 	cat(sprintf("%8d %s\n", c, w))
 }, head(words.sorted$words, num.words), head(words.sorted$Freq, num.words))
diff --git a/topwords.sh b/topwords.sh
 #!/usr/bin/env bash
 NUM_WORDS="$1"
 tr '[:upper:]' '[:lower:]' |
 grep -oE '\w+' |
 sort |
 uniq -c |
 sort -nr |
 head -n $NUM_WORDS
	#!/usr/bin/env python
	import re
	import sys
	from collections import Counter
	num_words = int(sys.argv[1])
	text = sys.stdin.read()
	text = text.lower()
	words = re.split('\W+', text)
	cnt = Counter(words)
	for word, count in cnt.most_common(num_words):
	print "%8d %s" % (count, word)
	#!/usr/bin/env Rscript
	num.words <- as.integer(commandArgs(trailingOnly = TRUE))
	f <- file("stdin")
	input.lines <- readLines(f)
	close(f)
	full.text <- tolower(paste(input.lines, collapse = " "))
	splits <- gregexpr("\\w+", full.text)
	words.all <- (regmatches(full.text, splits)[[1]])
	words.unique <- as.data.frame(table(words.all))
	words.sorted <- words.unique[order(-words.unique$Freq),]
	dummy <- mapply(function(w, c) {
	cat(sprintf("%8d %s\n", c, w))
	}, head(words.sorted$words, num.words), head(words.sorted$Freq, num.words))