dustin · October 25, 2013 22:26
diff --git a/massjoin b/massjoin
 #!/usr/bin/env python

 import re
 import string
 import fileinput
 import collections

 abbrevs = (
    (1<<50L, 'PB'),
    (1<<40L, 'TB'),
    (1<<30L, 'GB'),
    (1<<20L, 'MB'),
    (1<<10L, 'kB'),
    (1, 'b')
 )

 revabrevs = dict([(v, k) for k, v in abbrevs])

 ROBOT=re.compile(r'(\d+)([A-z]+)-')

 def de_humanize_bytes(s):
    match = ROBOT.search(s)
    return int(match.groups()[0]) * revabrevs[match.groups()[1]]

 found = collections.defaultdict(lambda: 0)

 for line in fileinput.input():
    line = string.replace(line, " byte", "b")
    line = string.replace(line, " - ", "-")
    parts = line.split(" ")
    found[parts[0]] += int(parts[1])

 histo = [(k,de_humanize_bytes(k),v) for k,v in found.iteritems()]
 histo.sort(key=lambda x: x[1])
 for bucket, _, count in histo:
    print bucket, count
	#!/usr/bin/env python

	import re
	import string
	import fileinput
	import collections

	abbrevs = (
	(1<<50L, 'PB'),
	(1<<40L, 'TB'),
	(1<<30L, 'GB'),
	(1<<20L, 'MB'),
	(1<<10L, 'kB'),
	(1, 'b')
	)

	revabrevs = dict([(v, k) for k, v in abbrevs])

	ROBOT=re.compile(r'(\d+)([A-z]+)-')

	def de_humanize_bytes(s):
	match = ROBOT.search(s)
	return int(match.groups()[0]) * revabrevs[match.groups()[1]]

	found = collections.defaultdict(lambda: 0)

	for line in fileinput.input():
	line = string.replace(line, " byte", "b")
	line = string.replace(line, " - ", "-")
	parts = line.split(" ")
	found[parts[0]] += int(parts[1])

	histo = [(k,de_humanize_bytes(k),v) for k,v in found.iteritems()]
	histo.sort(key=lambda x: x[1])
	for bucket, _, count in histo:
	print bucket, count
No results found