Skip to content

Instantly share code, notes, and snippets.

@jnothman
Created February 5, 2015 07:07
Show Gist options
  • Save jnothman/f180e1ed586905b3925a to your computer and use it in GitHub Desktop.
Save jnothman/f180e1ed586905b3925a to your computer and use it in GitHub Desktop.
count or sum unix command
#!/usr/bin/env python
"""Count or sum, while uniquing rows, without full sort of data
By using --key-fields, can also show example row that has some particular fields.
(This was much simpler when it just counted!)
"""
import sys
import argparse
def slice_type(x):
if '-' in x:
start, _, stop = x.partition('-')
return slice(int(start) - 1, int(stop))
return slice(int(x) - 1, int(x))
ap = argparse.ArgumentParser()
ap.add_argument('-k', '--key-fields', default=slice(None, None), type=slice_type, help='Key field or contiguous fields for tab separated input')
ap.add_argument('-s', '--sum-field', type=int, help='Perform summation over this field')
args = ap.parse_args()
key_fields = args.key_fields
sum_field = getattr(args, 'sum_field', None)
if sum_field is not None:
sum_field -= 1
res = {}
total = 0
for l in sys.stdin:
fields = l.rstrip('\n\r').split('\t')
key = tuple(fields[key_fields])
if sum_field is None:
val = 1
else:
val = fields[sum_field]
if '.' in val:
val = float(val)
else:
val = int(val)
total += val
if key in res:
val += res[key][1]
res[key] = (l, val)
print(res)
for k in sorted(res.iterkeys()):
sys.stdout.write('{1}\t{0}'.format(*res[k]))
sys.stderr.write('Total\t{}\n'.format(total))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment