Last active
August 25, 2018 20:24
-
-
Save kofemann/0c6489b8cc23776aae84 to your computer and use it in GitHub Desktop.
A python script to print a histogram
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """ | |
| Print a histogram of word distribution. Takes single word per line from stdin | |
| and prints dtrace output like histogram: | |
| $ echo "some text with some text fields as text" | tr -s '[:space:]' '\n' | ./histo.py 10 | |
| text | 3 | ################################################## | |
| some | 2 | ################################# | |
| with | 1 | ################ | |
| as | 1 | ################ | |
| fields | 1 | ################ | |
| """ | |
| import sys | |
| import math | |
| TERMINAL_SIZE = 50 | |
| # lambda used for extraxting second field of a pair | |
| second_field = lambda x: x[1] | |
| def histo(data, top=-1): | |
| # take all non empty records | |
| records = filter( lambda x : len(x) > 0, map(lambda x : x.strip(), data)) | |
| values = {} | |
| # build a dictionary of { key : hit_count } | |
| for r in records: | |
| values[r] = values.get(r, 0) + 1 | |
| # convert into list of pairs: (key:hit_count) | |
| values_as_pairs = values.items() | |
| # find the pair with highest hit_count and take the value | |
| max_value = max(values_as_pairs, key=second_field)[1] | |
| # sort in decending order ant take first top records | |
| for k,v in sorted(values_as_pairs, key=second_field, reverse=True)[0:top]: | |
| print("%24s | %6d | %s" % (k.strip(), v, '#'*int(math.floor((v*TERMINAL_SIZE)/max_value)))) | |
| if __name__ == '__main__': | |
| top = -1 | |
| if len(sys.argv) > 1: | |
| top = int(sys.argv[1]) | |
| histo(sys.stdin.readlines(), top) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment