Skip to content

Instantly share code, notes, and snippets.

@sirex
Last active August 29, 2015 14:19
Show Gist options
  • Save sirex/15156c5f5a0b89fc5e64 to your computer and use it in GitHub Desktop.
Save sirex/15156c5f5a0b89fc5e64 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import sys
import collections
import functools
import operator
def read(filename):
with open(filename, encoding='utf-8') as f:
for line in f:
yield set(map(str.strip, line.split(',')))
def get_most_common(common, items):
for item, n in common:
if item in items:
return item
def sort(counter):
counter = sorted(counter.items())
counter.sort(key=operator.itemgetter(1), reverse=True)
return counter
def main():
# Read data
itemsets = list(read(sys.argv[1]))
# Count most common choices
common = collections.Counter()
for items in itemsets:
common.update(items)
common = sort(common)
for items, n in common:
print("%3d %s" % (n, items))
print()
print('-' * 42)
print()
# Print final results
func = functools.partial(get_most_common, common)
result = collections.Counter(map(func, itemsets))
for items, n in sort(result):
print("%3d %s" % (n, items))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment