Skip to content

Instantly share code, notes, and snippets.

@kofemann
Last active August 25, 2018 20:24
Show Gist options
  • Save kofemann/0c6489b8cc23776aae84 to your computer and use it in GitHub Desktop.
Save kofemann/0c6489b8cc23776aae84 to your computer and use it in GitHub Desktop.
A python script to print a histogram
#!/usr/bin/env python
"""
Print a histogram of word distribution. Takes single word per line from stdin
and prints dtrace output like histogram:
$ echo "some text with some text fields as text" | tr -s '[:space:]' '\n' | ./histo.py 10
text | 3 | ##################################################
some | 2 | #################################
with | 1 | ################
as | 1 | ################
fields | 1 | ################
"""
import sys
import math
TERMINAL_SIZE = 50
# lambda used for extraxting second field of a pair
second_field = lambda x: x[1]
def histo(data, top=-1):
# take all non empty records
records = filter( lambda x : len(x) > 0, map(lambda x : x.strip(), data))
values = {}
# build a dictionary of { key : hit_count }
for r in records:
values[r] = values.get(r, 0) + 1
# convert into list of pairs: (key:hit_count)
values_as_pairs = values.items()
# find the pair with highest hit_count and take the value
max_value = max(values_as_pairs, key=second_field)[1]
# sort in decending order ant take first top records
for k,v in sorted(values_as_pairs, key=second_field, reverse=True)[0:top]:
print("%24s | %6d | %s" % (k.strip(), v, '#'*int(math.floor((v*TERMINAL_SIZE)/max_value))))
if __name__ == '__main__':
top = -1
if len(sys.argv) > 1:
top = int(sys.argv[1])
histo(sys.stdin.readlines(), top)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment