Skip to content

Instantly share code, notes, and snippets.

@geekscrapy
Created October 4, 2017 17:16
Show Gist options
  • Save geekscrapy/78e29b8b520cef94b6ec2eb1dd2acafe to your computer and use it in GitHub Desktop.
Save geekscrapy/78e29b8b520cef94b6ec2eb1dd2acafe to your computer and use it in GitHub Desktop.
# fields = ordered list of sorting fields
# obs = an iterator of objects where fields are present (fields MUST exist)
# order_desc = Sort descending(false)
# ignore_case = put field vales to lowercase, generates less overlap
# top_count = return the top X
def histo(fields, obs, order_desc=True, ignore_case=False, top_count=None):
master = []
if not type(fields) == list:
fields = [fields]
for o in obs:
tmp = ()
for f in fields:
if ignore_case:
val = str(getattr(o, f)).strip().lower()
else:
val = str(getattr(o, f)).strip()
tmp = tmp + (val,)
master.append(tmp)
master = Counter(master)
master_counted = master.most_common(top_count)
# master_ordered = sorted(master.items(), key=lambda i: i[1], reverse=order_desc)
max_width = max(len(', '.join(i[0])) for i in master_counted)
max_count = sum(master.values())
print "%s %s %s %s" % ('Fields'.ljust(max_width), 'Count', 'Percent', 'Hist')
for line in master_counted:
left = ', '.join(line[0])
count = line[1]
raw_percent = (100.0 * count/max_count)
padded_percent = "{:.2f}".format(raw_percent)
hist_line = int(raw_percent) * u'\u25A0'
print "%s %s %s%% %s" % (left.ljust(max_width), str(count).ljust(min(5,master_counted[0])), str(padded_percent).rjust(6), hist_line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment