Skip to content

Instantly share code, notes, and snippets.

@rohit-jamuar
Last active August 29, 2015 14:04
Show Gist options
  • Save rohit-jamuar/51f356b9cab7e765d1c0 to your computer and use it in GitHub Desktop.
Save rohit-jamuar/51f356b9cab7e765d1c0 to your computer and use it in GitHub Desktop.
Number of anagrams in a file
#!/usr/bin/python
'''
Anagram count
Assumption - One word per line
Time complexity - O(n*k*log(k)); where n is the total number of
lines and k is the maximum length of word.
Space Complexity - O(m*p); where m is the number of unique tuples
consisting of sorted characters (from read words) and p is the
maximum size of such a tuple.
'''
from sys import argv
def get_next_lines(byte_limit, fname):
'''
A generator which yields a list of words (whose size is
less than or equal to 'byte_limit' bytes)
'''
with open(fname) as input_file:
lines = input_file.readlines(byte_limit)
while lines:
yield lines
lines = input_file.readlines(byte_limit)
if __name__ == '__main__':
if len(argv) == 3:
try:
BYTE_LIMIT = int(argv[1])
FILE_NAME = argv[2]
if BYTE_LIMIT >= 1:
DATA = {}
for WORD_LIST in get_next_lines(BYTE_LIMIT, FILE_NAME):
for WORD in WORD_LIST:
WORD = tuple(sorted(WORD.strip().lower()))
DATA[WORD] = DATA.get(WORD, 0) + 1
print sum([ elem for elem in DATA.values() if elem > 1 ])
else:
print "Invalid maximum byte-limit!"
except ValueError:
print "The first argument should be an integer!"
except IOError:
print "Could not find a file with name '%s'" % FILE_NAME
else:
print "Invalid number of arguments passed!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment