justinvh · August 29, 2015 13:56
diff --git a/tally.c b/tally.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
 #include <sys/uio.h>
 #include <unistd.h>

 #define ascii_chars 128

 struct Counter {
    char k;
    int v;
 };

 int compare(const void* a, const void* b)
 {
    const struct Counter* lhs = a;
    const struct Counter* rhs = b;
    if (lhs->v > rhs->v) return -1;
    else if (lhs->v < rhs->v) return 1;
    else return 0;
 }

 int main()
 {
    struct Counter tally[ascii_chars] = {};

    for (int i = 0; i < ascii_chars; i++) {
        tally[i].k = i;
    }

    int n;
    char buf[BUFSIZ];
    while ((n = read(STDIN_FILENO, buf, BUFSIZ)) > 0) {
        for (int i = 0; i < n; i++) {
            tally[buf[i]].v += 1;
        }
    }

    qsort(tally, ascii_chars, sizeof(struct Counter), compare);

    for (int i = 0; i < ascii_chars; i++) {
        if (tally[i].v)
            printf("%c %d\n", tally[i].k, tally[i].v);
    }
 }
diff --git a/tally.cpp b/tally.cpp
 #include <vector>
 #include <functional>
 #include <cstdio>

 #include <sys/types.h>
 #include <sys/uio.h>
 #include <unistd.h>

 int main()
 {
    std::vector<char> contents;

    // B.getContents
    int n;
    char buf[BUFSIZ];
    while ((n = read(STDIN_FILENO, buf, BUFSIZ)) > 0) {
        for (int i = 0; i < n; i++) {
            contents.push_back(buf[i]);
        }
    }

    // sort
    std::sort(contents.begin(), contents.end());

    // map rle . B.group
    std::vector<std::pair<int, char>> group;
    char current = *contents.begin();
    int group_size = 1;
    for (auto iter = contents.begin(); iter != contents.end(); ++iter) {
        if (*iter == current) {
            group_size++;
            if (iter + 1 == contents.end()) {
                group.push_back(std::make_pair(group_size, current));
            }
        } else {
            group.push_back(std::make_pair(group_size, current));
            current = *iter;
            group_size = 1;
        }
    }

    // reverse . sort
    std::sort(group.begin(), group.end(),
              std::greater<std::pair<int, char>>());

    // fmt (count, ch) = printf "%c %d\n" ch count
    for (auto const& p : group) {
        printf("%c %d\n", p.second, p.first);
    }
 }
diff --git a/tally.hs b/tally.hs
 import qualified Data.ByteString.Char8 as B
 import Data.List (sort)
 import Text.Printf (printf)

 tally :: B.ByteString -> [(Int, Char)]
 tally = reverse . sort . map rle . B.group . B.sort
 where
  rle c = (B.length c, B.head c)

 main :: IO ()
 main = do
  contents <- B.getContents
  let results = tally contents
  mapM_ fmt results
 where
  fmt (count, ch) = printf "%c %d\n" ch count
diff --git a/tally.py b/tally.py
 # 1-to-1 mapping to the Haskell version

 import sys
 from itertools import imap as map, groupby


 def rle(k):
    return (len(list(k[1])), k[0])


 def tally(contents):
    return reversed(sorted(map(rle, groupby(sorted(contents)))))


 for count, ch in tally(sys.stdin.read()):
    print("{} {}".format(ch, count))
diff --git a/tally.txt b/tally.txt
 ./tally_hs < data.txt  0.57s user 0.15s system 99% cpu 0.724 total
 ./tally_c < data.txt  0.10s user 0.09s system 98% cpu 0.194 total
 ./tally_cpp < data.txt  4.57s user 0.21s system 99% cpu 4.787 total
 ./tally.py < data.txt  43.61s user 1.63s system 94% cpu 46.547 total
 ./tally2.py < data.txt  42.66s user 1.79s system 99% cpu 44.850 total
 ./tally3.py < data.txt  52.04s user 0.17s system 99% cpu 52.345 total
 ./tally4.py < data.txt  25.31s user 0.14s system 99% cpu 25.469 total
diff --git a/tally2.py b/tally2.py
 # A different approach removing map usage
 import sys
 from itertools import groupby


 def tally(contents):
    txs = ((len(list(g)), k) for k, g in groupby(sorted(contents)))
    return sorted(txs, reverse=True)


 for count, ch in tally(sys.stdin.read()):
    print("{} {}".format(ch, count))
diff --git a/tally3.py b/tally3.py
 import sys
 from operator import itemgetter
 from collections import Counter

 data = Counter(sys.stdin.read())
 for ch, count in sorted(data.iteritems(), key=itemgetter(1), reverse=True):
    print("{} {}".format(ch, count))
diff --git a/tally4.py b/tally4.py
 import sys
 from operator import itemgetter
 from collections import defaultdict

 lut = defaultdict(int)
 for ch in sys.stdin.read():
    lut[ch] += 1

 for ch, count in sorted(lut.iteritems(), key=itemgetter(1), reverse=True):
    print("{} {}".format(ch, count))
	#include <stdio.h>
	#include <stdlib.h>
	#include <sys/types.h>
	#include <sys/uio.h>
	#include <unistd.h>

	#define ascii_chars 128

	struct Counter {
	char k;
	int v;
	};

	int compare(const void* a, const void* b)
	{
	const struct Counter* lhs = a;
	const struct Counter* rhs = b;
	if (lhs->v > rhs->v) return -1;
	else if (lhs->v < rhs->v) return 1;
	else return 0;
	}

	int main()
	{
	struct Counter tally[ascii_chars] = {};

	for (int i = 0; i < ascii_chars; i++) {
	tally[i].k = i;
	}

	int n;
	char buf[BUFSIZ];
	while ((n = read(STDIN_FILENO, buf, BUFSIZ)) > 0) {
	for (int i = 0; i < n; i++) {
	tally[buf[i]].v += 1;
	}
	}

	qsort(tally, ascii_chars, sizeof(struct Counter), compare);

	for (int i = 0; i < ascii_chars; i++) {
	if (tally[i].v)
	printf("%c %d\n", tally[i].k, tally[i].v);
	}
	}
	#include <vector>
	#include <functional>
	#include <cstdio>

	#include <sys/types.h>
	#include <sys/uio.h>
	#include <unistd.h>

	int main()
	{
	std::vector<char> contents;

	// B.getContents
	int n;
	char buf[BUFSIZ];
	while ((n = read(STDIN_FILENO, buf, BUFSIZ)) > 0) {
	for (int i = 0; i < n; i++) {
	contents.push_back(buf[i]);
	}
	}

	// sort
	std::sort(contents.begin(), contents.end());

	// map rle . B.group
	std::vector<std::pair<int, char>> group;
	char current = *contents.begin();
	int group_size = 1;
	for (auto iter = contents.begin(); iter != contents.end(); ++iter) {
	if (*iter == current) {
	group_size++;
	if (iter + 1 == contents.end()) {
	group.push_back(std::make_pair(group_size, current));
	}
	} else {
	group.push_back(std::make_pair(group_size, current));
	current = *iter;
	group_size = 1;
	}
	}

	// reverse . sort
	std::sort(group.begin(), group.end(),
	std::greater<std::pair<int, char>>());

	// fmt (count, ch) = printf "%c %d\n" ch count
	for (auto const& p : group) {
	printf("%c %d\n", p.second, p.first);
	}
	}
	import qualified Data.ByteString.Char8 as B
	import Data.List (sort)
	import Text.Printf (printf)

	tally :: B.ByteString -> [(Int, Char)]
	tally = reverse . sort . map rle . B.group . B.sort
	where
	rle c = (B.length c, B.head c)

	main :: IO ()
	main = do
	contents <- B.getContents
	let results = tally contents
	mapM_ fmt results
	where
	fmt (count, ch) = printf "%c %d\n" ch count
	# 1-to-1 mapping to the Haskell version

	import sys
	from itertools import imap as map, groupby


	def rle(k):
	return (len(list(k[1])), k[0])


	def tally(contents):
	return reversed(sorted(map(rle, groupby(sorted(contents)))))


	for count, ch in tally(sys.stdin.read()):
	print("{} {}".format(ch, count))
	./tally_hs < data.txt 0.57s user 0.15s system 99% cpu 0.724 total
	./tally_c < data.txt 0.10s user 0.09s system 98% cpu 0.194 total
	./tally_cpp < data.txt 4.57s user 0.21s system 99% cpu 4.787 total
	./tally.py < data.txt 43.61s user 1.63s system 94% cpu 46.547 total
	./tally2.py < data.txt 42.66s user 1.79s system 99% cpu 44.850 total
	./tally3.py < data.txt 52.04s user 0.17s system 99% cpu 52.345 total
	./tally4.py < data.txt 25.31s user 0.14s system 99% cpu 25.469 total
	# A different approach removing map usage
	import sys
	from itertools import groupby


	def tally(contents):
	txs = ((len(list(g)), k) for k, g in groupby(sorted(contents)))
	return sorted(txs, reverse=True)


	for count, ch in tally(sys.stdin.read()):
	print("{} {}".format(ch, count))
	import sys
	from operator import itemgetter
	from collections import Counter

	data = Counter(sys.stdin.read())
	for ch, count in sorted(data.iteritems(), key=itemgetter(1), reverse=True):
	print("{} {}".format(ch, count))
	import sys
	from operator import itemgetter
	from collections import defaultdict

	lut = defaultdict(int)
	for ch in sys.stdin.read():
	lut[ch] += 1

	for ch, count in sorted(lut.iteritems(), key=itemgetter(1), reverse=True):
	print("{} {}".format(ch, count))