Created
February 5, 2015 07:07
-
-
Save jnothman/f180e1ed586905b3925a to your computer and use it in GitHub Desktop.
count or sum unix command
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Count or sum, while uniquing rows, without full sort of data | |
By using --key-fields, can also show example row that has some particular fields. | |
(This was much simpler when it just counted!) | |
""" | |
import sys | |
import argparse | |
def slice_type(x): | |
if '-' in x: | |
start, _, stop = x.partition('-') | |
return slice(int(start) - 1, int(stop)) | |
return slice(int(x) - 1, int(x)) | |
ap = argparse.ArgumentParser() | |
ap.add_argument('-k', '--key-fields', default=slice(None, None), type=slice_type, help='Key field or contiguous fields for tab separated input') | |
ap.add_argument('-s', '--sum-field', type=int, help='Perform summation over this field') | |
args = ap.parse_args() | |
key_fields = args.key_fields | |
sum_field = getattr(args, 'sum_field', None) | |
if sum_field is not None: | |
sum_field -= 1 | |
res = {} | |
total = 0 | |
for l in sys.stdin: | |
fields = l.rstrip('\n\r').split('\t') | |
key = tuple(fields[key_fields]) | |
if sum_field is None: | |
val = 1 | |
else: | |
val = fields[sum_field] | |
if '.' in val: | |
val = float(val) | |
else: | |
val = int(val) | |
total += val | |
if key in res: | |
val += res[key][1] | |
res[key] = (l, val) | |
print(res) | |
for k in sorted(res.iterkeys()): | |
sys.stdout.write('{1}\t{0}'.format(*res[k])) | |
sys.stderr.write('Total\t{}\n'.format(total)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment