Created
December 25, 2011 12:55
-
-
Save Asmageddon/1519211 to your computer and use it in GitHub Desktop.
Evaluation of password security for sequences of random characters and words
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random, math | |
import re | |
def log_bin(value): | |
return math.log(value) / math.log(2) | |
def cumprod(values): | |
result = 1 | |
for i in values: | |
result *= i | |
return result | |
def gen_password(charset, length): | |
result = "" | |
for i in range(length): | |
to_add = random.choice(charset) | |
if len(to_add) > 1: #If it's not a single char, capitalize it so the attacker cannot make assumption that it's all lowercase | |
to_add = to_add.capitalize() | |
result += to_add | |
return result | |
def format_size(size): | |
size *= 1.0 #Convert to floats | |
unit = "" | |
prefix_list = "KMGTPEZY" #Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta | |
prefix = -1 | |
while size > 1024: | |
size /= 1024 | |
prefix+= 1 | |
if prefix >= len(prefix_list): | |
return "much" | |
elif prefix == 0: | |
return "%.2fB" % size | |
else: | |
return "%.2f%siB" % (size, prefix_list[prefix]) | |
def num_separators(number): | |
number = list( str( int(number) ) ) | |
result = "" | |
separator = ',' | |
for index, char in enumerate(reversed(number)): | |
result = char + result | |
if not (index +1) % 3 and index != len(number): | |
result = separator + result | |
return result | |
def acronym(text): #From big letters found in it | |
pattern = re.compile("[A-Z]") #Simply find big letters ^^ | |
return ''.join( re.findall(pattern, text) ) | |
dictfile = open("/usr/share/dict/words", "r") | |
pattern = re.compile("^.*[^'][^s]$") #Remove words ending with 's | |
#Generate list of words: | |
word_list = [word[:-1] for word in dictfile.readlines() if re.match(pattern, word[:-1])] # [:-1] removes the last character of the line - \n | |
#Count them and their average length | |
word_count = len(word_list) | |
average_word_length = sum([len(word) for word in word_list]) / (1.0 * len(word_list)) | |
word_entropy_per_char = log_bin(word_count) / average_word_length | |
hash_digest_size = 512 / 8 #For SHA512, in bytes | |
#We're only using ASCII letters and numbers, no symbols except for underscore - a common set of allowed characters for passwords | |
char_list = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789" | |
char_count = len(char_list) | |
char_entropy_per_char = log_bin(char_count) | |
print "%i words in our dictionary" % word_count | |
print " Of average length %.2f" % average_word_length | |
print " With entropy content of %.2f bits per character\n" % word_entropy_per_char | |
print "We're using %i characters" % char_count | |
print " With entropy content of %.2f bits each" % char_entropy_per_char | |
print " Containing %.2f%% the entropy content of single character in a random word\n" % (char_entropy_per_char / word_entropy_per_char * 100) | |
results = [] #In format (combinations, text, disk usage of all hashes) | |
#Let's do the math for characters first, from 4 to 22 | |
for i in range(4, 22): | |
combinations = char_count**i | |
#Disk usage of hashes for all possibilities | |
disk_usage = combinations * (hash_digest_size + i + 1) #+1 for a delimiter, presumably newline | |
results += [(combinations, | |
"%2i chars: %.2e permutations, %s disk space" % (i, combinations, format_size(disk_usage) ), | |
disk_usage | |
)] | |
#Now for words, we assume that attacker has a complete dictionary available | |
for i in range(1,8): | |
combinations = word_count**i | |
disk_usage = combinations * (hash_digest_size + i*average_word_length + 1) | |
results += [(combinations, | |
"%i words(%2ich): %.2e permutations, %s disk space" % (i, math.ceil(i*average_word_length), combinations, format_size(disk_usage) ), | |
disk_usage | |
)] | |
#Disclaimer: I have no idea how much this would take on what hardware, so please adjust yourself | |
hashing_speed = 9001.0 * 1000**2 #9001 million hashes per second | |
print "With %s hashes per second: " % num_separators(hashing_speed) | |
marks = [60, 60, 24, 30, 12, 100, 137000000] #Minutes, hours, days, months, years, centuries, ages of universe | |
marks_text = ["seconds", "minutes", "hours", "days", "months", "years", "centuries", "ages of universe"] | |
mark = 0 | |
print "\nExample passwords:" | |
print " 12 random characters:" | |
for i in range(0,5): | |
print " %s %s" % (gen_password(char_list, 12), gen_password(char_list, 12)) | |
print " 4 random words:" | |
for i in range(0,5): | |
word = gen_password(word_list, 4) | |
print " %s: %s (%ich)" % (acronym(word), word, len(word)) | |
print " 5 random words:" | |
for i in range(0,5): | |
word = gen_password(word_list, 5) | |
print " %s: %s (%ich)" % (acronym(word), word, len(word)) | |
print " 15 random characters:" | |
for i in range(0,5): | |
print " %s %s" % (gen_password(char_list, 15), gen_password(char_list, 15)) | |
print "==Starting from seconds==" | |
for i in sorted(results): #Sorted by amount of permutations | |
#Change our unit when we go over it's limit | |
while i[0] / hashing_speed > cumprod( marks[:mark+1] ) and mark <= len(marks)-1: #More than one mark may be passed in one step | |
mark += 1 | |
print "==Going into %s==" % marks_text[mark] | |
print "%s (%.2f %s to brute force)" % ( i[1], i[0] / hashing_speed / cumprod(marks[:mark]), marks_text[mark] ) | |
print "Disk usage is amount of space to store all hashes" | |
print "Brute force assumes worst case, averages to half of the value" | |
print "\nNotes:" | |
print " With disk usage, I am assuming no compression is used on words." | |
print " Zipping my words file yielded decrease from 931 to 252 kilobytes" | |
print " Compressing would probably decrease size by ~2/3 of that," | |
print " since hashes are pretty much incompressible" | |
print " Compressing random characters makes no significant difference" | |
print " I am not accounting for technological progress, but I assume" | |
print " that by time it becomes easy enough to crack these," | |
print " most currently used algorithms will be irrelevant anyway" | |
print "\nVerdict:" | |
print " While random characters contain way more entropy per character," | |
print " a set of words is much easier to remember" | |
num_increase = average_word_length * 4 * 10 | |
print " Inserting randomly a single number into 4 words increases" | |
print " amount of permutations %i times (%i orders of magnitude)" % (num_increase, math.floor (math.log10(num_increase)) ) | |
print " %.2f%% of how much adding a single char to a random sequence does" % (num_increase * 100.0 / char_count) | |
print "As such, I declare that using a sequence of 5 or even 4 random words" | |
print " is a superior alternative to random characters," | |
print " since it is much easier for human brain to remember sequences" | |
print " that have a meaning, and acronyms help further" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment