Skip to content

Instantly share code, notes, and snippets.

@ryo1kato
Created February 13, 2016 21:46
Show Gist options
  • Save ryo1kato/f307014d9e106dfe2bcc to your computer and use it in GitHub Desktop.
Save ryo1kato/f307014d9e106dfe2bcc to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
desc="100x faster uniq(1) implementation with hash - you don't have to sort."
import sys
import argparse
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('-c', '--count', action='store_true',
help='Precede each output line with the count of the number of times'
'the line occurred in the input, followed by a single space.'
'With this option, output is sorted by number of duplicates')
parser.add_argument('-r', '--reverse', action='store_true',
help='With -c option, reverse the sort order.')
parser.add_argument('-d', '--dup', action='store_true',
help='only output lines with duplicates in the input')
parser.add_argument('-u', '--uniq', action='store_true',
help='Only output lines without duplicates in the input.')
parser.add_argument('filename', metavar='FILE', type=str, nargs='?')
def main(args):
if args.filename:
if args.filename == '-':
infd = sys.stdin
else:
infd = open(args.filename)
else:
infd = sys.stdin
counter = dict()
for line in infd:
if line in counter:
counter[line] += 1
if not args.count and args.dup and counter[line] == 2:
sys.stdout.write(line)
else:
counter[line] = 1
if not (args.count or args.uniq or args.dup):
sys.stdout.write(line)
if args.count:
maxdigits = len(str(max(counter.values())))
f = "{:" + str(maxdigits) + "d} {}"
sorted_counter = sorted(counter, key=counter.get)
if args.reverse:
sorted_counter.reverse()
for line in sorted_counter:
count = counter[line]
if not (args.dup or args.uniq) \
or (args.dup and count >= 2) \
or (args.uniq and count == 1):
sys.stdout.write(f.format(counter[line], line))
elif args.uniq:
for line in counter:
if counter[line] == 1:
sys.stdout.write(line)
if __name__ == '__main__':
(args) = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment