Last active
July 27, 2021 23:16
-
-
Save aayla-secura/23a6586e65641c0061fb105a4efa5f74 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import logging | |
import math | |
import string | |
import sys | |
import argparse | |
from collections.abc import Mapping, MutableMapping | |
from collections import Counter | |
import re | |
class PwdStore: | |
def __init__(self, | |
username_prefixes=None, | |
username_suffixes=None): | |
self.__usernames__ = {} | |
self.__passwords__ = {} | |
self.username_prefixes = list( | |
map(lambda x: x.lower(), | |
filter(None, username_prefixes or []))) | |
self.username_suffixes = list( | |
map(lambda x: x.lower(), | |
filter(None, username_suffixes or []))) | |
def add(self, username, password): | |
username_base = self.__get_username_base__(username) | |
password_base = self.__get_password_base__(password) | |
key = (username_base, username), (password_base, password) | |
self.__usernames__.setdefault(username_base, set()) | |
self.__passwords__.setdefault(password_base, set()) | |
self.__usernames__[username_base].add(key) | |
self.__passwords__[password_base].add(key) | |
@property | |
def all_usernames(self): | |
return self.__usernames__ | |
@property | |
def all_passwords(self): | |
return self.__passwords__ | |
def similar_users(self, username): | |
'''Find users belonging to the same person''' | |
try: | |
return self.__usernames__[ | |
self.__get_username_base__(username)] | |
except KeyError: | |
return set() | |
def similar_passwords(self, password): | |
'''Find passwords similar to the given one''' | |
try: | |
return self.__passwords__[ | |
self.__get_password_base__(password)] | |
except KeyError: | |
return set() | |
def __get_username_base__(self, username): | |
'''Returns username stripped of the first matched prefix or | |
suffix''' | |
if username is None: | |
return username | |
username_base = username.lower() | |
for pref in self.username_prefixes or []: | |
if username_base.startswith(pref): | |
return username_base[len(pref):] | |
for suff in self.username_suffixes or []: | |
if username_base.endswith(suff): | |
return username_base[:-len(suff)] | |
return username_base | |
@classmethod | |
def __get_password_base__(cls, password): | |
'''Returns a password base word(s) | |
All lower-case, de-leetified, any sequence of 2 or more digits | |
and preceding or following digits or special characters are | |
ignored. | |
''' | |
return cls.__get_password_re__(password, keep_re=False) | |
@staticmethod | |
def __get_password_re__(password, keep_re=True): | |
'''Returns a password regex | |
The regex matches a password of any case, optionally | |
leetified, and where any sequence of 2 or more digits matches | |
any other at that position, and preceding or following digits | |
or special characters are ignored. | |
''' | |
surround_re = '^[^a-zA-Z]+', '[^a-zA-Z]+$' | |
literal_re = [ | |
'[0-9]{2,}', | |
] | |
leet = { | |
'a': '[aA4@]', | |
'b': '[bB8]', | |
'e': '[eE3]', | |
'g': '[gG9]', | |
'i': '[iI1]', | |
'l': '[lL1]', | |
'o': '[oO0]', | |
's': '[sS5$]', | |
't': '[tT7+]', | |
} | |
password_re = password.lower() | |
# order matters | |
for r in surround_re: | |
password_re = re.sub(r, '', password_re) | |
for r in literal_re: | |
password_re = re.sub(r, r if keep_re else '', password_re) | |
for b, r in leet.items(): | |
password_re = re.sub(f'(?<![0-9]){r}(?![0-9])', | |
r if keep_re else b, password_re) | |
if keep_re: | |
password_re = surround_re[0] + password_re + surround_re[1] | |
return password_re | |
class DictWithDefaultsBase(MutableMapping): | |
_default = None | |
_key_type = None | |
def __init__(self, data=None, /, **kwargs): | |
self.__data__ = {} | |
if data is None: | |
data = {} | |
self.update(data, **kwargs) | |
def __setitem__(self, key, value): | |
if self._key_type is not None: | |
key = self._key_type(key) | |
self.__data__[key] = value | |
def __getitem__(self, name): | |
try: | |
return self.__data__[name] | |
except KeyError: | |
if callable(self._default): | |
new = self._default() | |
else: | |
new = self._default | |
self[name] = new | |
return new | |
def __delitem__(self, key): | |
del self.__data__[key] | |
def __iter__(self): | |
yield from self.__data__.keys() | |
def __len__(self): | |
return len(self.__data__) | |
def copy(self): | |
return self.__class__(self.__data__.values()) | |
def __str__(self): | |
return str(dict(self.items())) | |
def __repr__(self): | |
return '{}({})'.format( | |
self.__class__.__name__, | |
str(self)) | |
class DictWithDefaults(DictWithDefaultsBase): | |
def __init__(self, default): | |
self._default = default | |
super().__init__() | |
class Freqs(DictWithDefaultsBase): | |
_default = 0 | |
class NumFreqs(Freqs): | |
_key_type = int | |
def __iter__(self): | |
yield from range(min(self.__data__.keys()), | |
max(self.__data__.keys()) + 1) | |
class CharTypeFreqs(DictWithDefaultsBase): | |
_default = Freqs | |
_key_type = int | |
class Analyzer: | |
_charsets = { | |
'lower': string.ascii_lowercase, | |
'upper': string.ascii_uppercase, | |
'digit': string.digits, | |
'special': string.punctuation + ' ', | |
'other': '' | |
} | |
def __init__(self, | |
username_prefixes=None, | |
username_suffixes=None): | |
self.__data__ = PwdStore( | |
username_prefixes=username_prefixes, | |
username_suffixes=username_suffixes) | |
self.__frequencies__ = { | |
'len': NumFreqs(), | |
'ncs': NumFreqs(), | |
'chtype': CharTypeFreqs(), | |
} | |
def add(self, pwd, username=None): | |
self.__data__.add(username, pwd) | |
nl = len(pwd) | |
ncs = self.ncharsets(pwd) | |
self.__frequencies__['len'][nl] += 1 | |
self.__frequencies__['ncs'][ncs] += 1 | |
for n, char in enumerate(pwd, 1): | |
self.__frequencies__['chtype'][n][ | |
self.chartype(char)] += 1 | |
def show_frequencies(self, | |
max_w=None, | |
vcenter_title=False, | |
show_empty=False): | |
print('~~~~~~~~~~ Lengths ~~~~~~~~~~') | |
self.__show_histogram__( | |
self.__frequencies__['len'], | |
max_w=max_w, | |
show_empty=show_empty) | |
print('\n~~~~~~~~~~ Character sets ~~~~~~~~~~') | |
self.__show_histogram__( | |
self.__frequencies__['ncs'], | |
max_w=max_w, | |
show_empty=show_empty) | |
print('\n~~~~~~~~~~ Character sets by position ~~~~~~~~~~') | |
for i in sorted(self.__frequencies__['chtype']): | |
self.__show_histogram__(self.__frequencies__['chtype'][i], | |
keys=self._charsets.keys(), | |
max_w=max_w, | |
title='{!s}:'.format(i), | |
vcenter_title=vcenter_title, | |
show_empty=show_empty) | |
print('\u2014' * (max_w + 12)) | |
def show_similar(self, min_rep=2, top=None): | |
print('\n~~~~~~~~~~ Top {}repeated passwords ~~~~~~~~~~'.format( | |
f'{top} ' if top is not None else '')) | |
colA = 'Number of times seen' | |
colB = 'Base word' | |
sep = ' ' * 6 | |
print(f'{colA}{sep}{colB}') | |
i = 1 | |
for b, pwds in sorted( | |
self.__data__.all_passwords.items(), | |
key=lambda x: len(x[1]), | |
reverse=True)[:top]: | |
if len(pwds) < min_rep: | |
return | |
print('{{num:<{w}d}}{{sep}}{{base}}'.format( | |
w=len(colA)).format(num=len(pwds), sep=sep, base=b)) | |
if i == top: | |
return | |
i += 1 | |
def show_shared(self): | |
def conflate(s): | |
# d = Counter() | |
d = DictWithDefaults(set) | |
for u, p in map(lambda x: (x[0][0], x[1][1]), s): | |
# d[u] += 1 | |
d[u].add(p) | |
return d | |
# TODO bettwe way to print a table | |
print('\n~~~~~~~~~~ Shared passwords ~~~~~~~~~~') | |
colA = 'User (low-priv)' | |
colB = 'Base word' | |
colC = 'Times' | |
colD = 'Passwords' | |
sep = ' ' * 6 | |
print(f'{colA:<15}{sep}{colB:<10}{sep}{colC:<5}{sep}{colD}') | |
for b, s in sorted( | |
self.__data__.all_passwords.items(), | |
key=lambda x: len(x[1]), | |
reverse=True): | |
for u, pwds in conflate(s).items(): | |
n = len(pwds) | |
if n == 1: | |
continue | |
print(('{user:<15}{sep}{base:<10}{sep}{n:<5}' | |
'{sep}{pwds}').format( | |
user=u, | |
sep=sep, | |
base=b, | |
n=n, | |
pwds=', '.join(pwds))) | |
@classmethod | |
def chartype(cls, char): | |
if len(char) != 1: | |
raise ValueError('Argument to chtype should be of length 1') | |
for n, cs in cls._charsets.items(): | |
if char in cs: | |
return n | |
return 'other' | |
@classmethod | |
def ncharsets(cls, pwd): | |
def rmchars(s, skip): | |
return s.translate(str.maketrans(dict.fromkeys(skip))) | |
num = 0 | |
for n, cs in cls._charsets.items(): | |
pwdB = rmchars(pwd, cs) | |
if pwd != pwdB: | |
num += 1 | |
pwd = pwdB | |
if pwd: | |
num += 1 # other characters | |
return num | |
@staticmethod | |
def __show_histogram__(freqs, | |
keys=None, | |
max_w=None, | |
title='', | |
vcenter_title=False, | |
show_empty=False): | |
if keys is None: | |
keys = freqs.keys() | |
pref_maxw = max(map(lambda x: len(str(x)), keys)) | |
freqs = {k: freqs[k] for k in keys | |
if freqs[k] or show_empty} | |
if max_w is not None: | |
scale = max_w / (max(freqs.values()) + 1) | |
else: | |
scale = 1 | |
if vcenter_title: | |
pref_i = int((len(freqs) - 1) / 2) | |
else: | |
pref_i = 0 | |
for i, k in enumerate(freqs): | |
if i == pref_i: | |
pref = title + ' ' | |
else: | |
pref = ' ' * (len(title) + 1) | |
print('{p}{{x!s:>{pw}}} {{l!s:{w}}} ({{v}})'.format( | |
p=pref, | |
pw=pref_maxw, | |
w=max_w).format( | |
x=k, | |
l='+' * math.ceil(freqs[k] * scale), | |
v=freqs[k])) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser( | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter | |
) | |
parser.add_argument('files', action='append') | |
parser.add_argument( | |
'-s', '--separator', default=':', metavar='CHAR', | |
help='Use SEP as the field separator. Treated as a regex.') | |
parser.add_argument( | |
'-P', '--password-field', type=int, default=2, metavar='NUM', | |
help='Take input field NUM as the password.') | |
parser.add_argument( | |
'-U', '--username-field', type=int, default=1, metavar='NUM', | |
help=('Take input field NUM as the username. Set to 0 if ' | |
'no username')) | |
parser.add_argument( | |
'-w', '--max-width', type=int, default=50, metavar='NUM', | |
help='Maximum histogram bin length.') | |
parser.add_argument( | |
'--vcenter', default=False, action='store_true', | |
help='Vertically center histogram titles.') | |
parser.add_argument( | |
'--show-empty', default=False, action='store_true', | |
help='Print empty histogram bins.') | |
parser.add_argument( | |
'-t', '--top-repeated', type=int, default=10, metavar='NUM', | |
help='Print top NUM repeated passwords.') | |
parser.add_argument( | |
'--admin-prefixes', nargs='+', | |
help='Treat these strings as prefixes for admin accounts.') | |
parser.add_argument( | |
'--admin-suffixes', nargs='+', | |
help='Treat these strings as suffixes for admin accounts.') | |
args = parser.parse_args() | |
analyzer = Analyzer( | |
username_prefixes=args.admin_prefixes, | |
username_suffixes=args.admin_suffixes) | |
for f in args.files: | |
with open(f) as fh: | |
for line in fh: | |
fields = line.strip('\r\n').split(args.separator) | |
username = None | |
if args.username_field > 0: | |
username = fields[args.username_field - 1] | |
pwd = fields[args.password_field - 1] | |
analyzer.add(pwd, username=username) | |
analyzer.show_frequencies( | |
max_w=args.max_width, | |
vcenter_title=args.vcenter, | |
show_empty=args.show_empty) | |
analyzer.show_similar(top=args.top_repeated) | |
analyzer.show_shared() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment