Skip to content

Instantly share code, notes, and snippets.

@dustin
Created October 25, 2013 22:26
Show Gist options
  • Select an option

  • Save dustin/7162792 to your computer and use it in GitHub Desktop.

Select an option

Save dustin/7162792 to your computer and use it in GitHub Desktop.
dehumanizing/rewriting/combining sizing stuff
#!/usr/bin/env python
import re
import string
import fileinput
import collections
abbrevs = (
(1<<50L, 'PB'),
(1<<40L, 'TB'),
(1<<30L, 'GB'),
(1<<20L, 'MB'),
(1<<10L, 'kB'),
(1, 'b')
)
revabrevs = dict([(v, k) for k, v in abbrevs])
ROBOT=re.compile(r'(\d+)([A-z]+)-')
def de_humanize_bytes(s):
match = ROBOT.search(s)
return int(match.groups()[0]) * revabrevs[match.groups()[1]]
found = collections.defaultdict(lambda: 0)
for line in fileinput.input():
line = string.replace(line, " byte", "b")
line = string.replace(line, " - ", "-")
parts = line.split(" ")
found[parts[0]] += int(parts[1])
histo = [(k,de_humanize_bytes(k),v) for k,v in found.iteritems()]
histo.sort(key=lambda x: x[1])
for bucket, _, count in histo:
print bucket, count
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment