Skip to content

Instantly share code, notes, and snippets.

@Asmageddon
Created December 25, 2011 12:55
Show Gist options
  • Save Asmageddon/1519211 to your computer and use it in GitHub Desktop.
Save Asmageddon/1519211 to your computer and use it in GitHub Desktop.
Evaluation of password security for sequences of random characters and words
import random, math
import re
def log_bin(value):
return math.log(value) / math.log(2)
def cumprod(values):
result = 1
for i in values:
result *= i
return result
def gen_password(charset, length):
result = ""
for i in range(length):
to_add = random.choice(charset)
if len(to_add) > 1: #If it's not a single char, capitalize it so the attacker cannot make assumption that it's all lowercase
to_add = to_add.capitalize()
result += to_add
return result
def format_size(size):
size *= 1.0 #Convert to floats
unit = ""
prefix_list = "KMGTPEZY" #Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta
prefix = -1
while size > 1024:
size /= 1024
prefix+= 1
if prefix >= len(prefix_list):
return "much"
elif prefix == 0:
return "%.2fB" % size
else:
return "%.2f%siB" % (size, prefix_list[prefix])
def num_separators(number):
number = list( str( int(number) ) )
result = ""
separator = ','
for index, char in enumerate(reversed(number)):
result = char + result
if not (index +1) % 3 and index != len(number):
result = separator + result
return result
def acronym(text): #From big letters found in it
pattern = re.compile("[A-Z]") #Simply find big letters ^^
return ''.join( re.findall(pattern, text) )
dictfile = open("/usr/share/dict/words", "r")
pattern = re.compile("^.*[^'][^s]$") #Remove words ending with 's
#Generate list of words:
word_list = [word[:-1] for word in dictfile.readlines() if re.match(pattern, word[:-1])] # [:-1] removes the last character of the line - \n
#Count them and their average length
word_count = len(word_list)
average_word_length = sum([len(word) for word in word_list]) / (1.0 * len(word_list))
word_entropy_per_char = log_bin(word_count) / average_word_length
hash_digest_size = 512 / 8 #For SHA512, in bytes
#We're only using ASCII letters and numbers, no symbols except for underscore - a common set of allowed characters for passwords
char_list = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789"
char_count = len(char_list)
char_entropy_per_char = log_bin(char_count)
print "%i words in our dictionary" % word_count
print " Of average length %.2f" % average_word_length
print " With entropy content of %.2f bits per character\n" % word_entropy_per_char
print "We're using %i characters" % char_count
print " With entropy content of %.2f bits each" % char_entropy_per_char
print " Containing %.2f%% the entropy content of single character in a random word\n" % (char_entropy_per_char / word_entropy_per_char * 100)
results = [] #In format (combinations, text, disk usage of all hashes)
#Let's do the math for characters first, from 4 to 22
for i in range(4, 22):
combinations = char_count**i
#Disk usage of hashes for all possibilities
disk_usage = combinations * (hash_digest_size + i + 1) #+1 for a delimiter, presumably newline
results += [(combinations,
"%2i chars: %.2e permutations, %s disk space" % (i, combinations, format_size(disk_usage) ),
disk_usage
)]
#Now for words, we assume that attacker has a complete dictionary available
for i in range(1,8):
combinations = word_count**i
disk_usage = combinations * (hash_digest_size + i*average_word_length + 1)
results += [(combinations,
"%i words(%2ich): %.2e permutations, %s disk space" % (i, math.ceil(i*average_word_length), combinations, format_size(disk_usage) ),
disk_usage
)]
#Disclaimer: I have no idea how much this would take on what hardware, so please adjust yourself
hashing_speed = 9001.0 * 1000**2 #9001 million hashes per second
print "With %s hashes per second: " % num_separators(hashing_speed)
marks = [60, 60, 24, 30, 12, 100, 137000000] #Minutes, hours, days, months, years, centuries, ages of universe
marks_text = ["seconds", "minutes", "hours", "days", "months", "years", "centuries", "ages of universe"]
mark = 0
print "\nExample passwords:"
print " 12 random characters:"
for i in range(0,5):
print " %s %s" % (gen_password(char_list, 12), gen_password(char_list, 12))
print " 4 random words:"
for i in range(0,5):
word = gen_password(word_list, 4)
print " %s: %s (%ich)" % (acronym(word), word, len(word))
print " 5 random words:"
for i in range(0,5):
word = gen_password(word_list, 5)
print " %s: %s (%ich)" % (acronym(word), word, len(word))
print " 15 random characters:"
for i in range(0,5):
print " %s %s" % (gen_password(char_list, 15), gen_password(char_list, 15))
print "==Starting from seconds=="
for i in sorted(results): #Sorted by amount of permutations
#Change our unit when we go over it's limit
while i[0] / hashing_speed > cumprod( marks[:mark+1] ) and mark <= len(marks)-1: #More than one mark may be passed in one step
mark += 1
print "==Going into %s==" % marks_text[mark]
print "%s (%.2f %s to brute force)" % ( i[1], i[0] / hashing_speed / cumprod(marks[:mark]), marks_text[mark] )
print "Disk usage is amount of space to store all hashes"
print "Brute force assumes worst case, averages to half of the value"
print "\nNotes:"
print " With disk usage, I am assuming no compression is used on words."
print " Zipping my words file yielded decrease from 931 to 252 kilobytes"
print " Compressing would probably decrease size by ~2/3 of that,"
print " since hashes are pretty much incompressible"
print " Compressing random characters makes no significant difference"
print " I am not accounting for technological progress, but I assume"
print " that by time it becomes easy enough to crack these,"
print " most currently used algorithms will be irrelevant anyway"
print "\nVerdict:"
print " While random characters contain way more entropy per character,"
print " a set of words is much easier to remember"
num_increase = average_word_length * 4 * 10
print " Inserting randomly a single number into 4 words increases"
print " amount of permutations %i times (%i orders of magnitude)" % (num_increase, math.floor (math.log10(num_increase)) )
print " %.2f%% of how much adding a single char to a random sequence does" % (num_increase * 100.0 / char_count)
print "As such, I declare that using a sequence of 5 or even 4 random words"
print " is a superior alternative to random characters,"
print " since it is much easier for human brain to remember sequences"
print " that have a meaning, and acronyms help further"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment