Last active
August 29, 2015 14:04
-
-
Save rohit-jamuar/51f356b9cab7e765d1c0 to your computer and use it in GitHub Desktop.
Number of anagrams in a file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
''' | |
Anagram count | |
Assumption - One word per line | |
Time complexity - O(n*k*log(k)); where n is the total number of | |
lines and k is the maximum length of word. | |
Space Complexity - O(m*p); where m is the number of unique tuples | |
consisting of sorted characters (from read words) and p is the | |
maximum size of such a tuple. | |
''' | |
from sys import argv | |
def get_next_lines(byte_limit, fname): | |
''' | |
A generator which yields a list of words (whose size is | |
less than or equal to 'byte_limit' bytes) | |
''' | |
with open(fname) as input_file: | |
lines = input_file.readlines(byte_limit) | |
while lines: | |
yield lines | |
lines = input_file.readlines(byte_limit) | |
if __name__ == '__main__': | |
if len(argv) == 3: | |
try: | |
BYTE_LIMIT = int(argv[1]) | |
FILE_NAME = argv[2] | |
if BYTE_LIMIT >= 1: | |
DATA = {} | |
for WORD_LIST in get_next_lines(BYTE_LIMIT, FILE_NAME): | |
for WORD in WORD_LIST: | |
WORD = tuple(sorted(WORD.strip().lower())) | |
DATA[WORD] = DATA.get(WORD, 0) + 1 | |
print sum([ elem for elem in DATA.values() if elem > 1 ]) | |
else: | |
print "Invalid maximum byte-limit!" | |
except ValueError: | |
print "The first argument should be an integer!" | |
except IOError: | |
print "Could not find a file with name '%s'" % FILE_NAME | |
else: | |
print "Invalid number of arguments passed!" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment