u1735067 · September 4, 2020 11:50
diff --git a/checksummer.py b/checksummer.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 from __future__ import print_function, unicode_literals
 import sys, errno, os, argparse, re, stat, hashlib
 from collections import OrderedDict

 hash_algos = {algo: getattr(hashlib, algo) for algo in hashlib.algorithms_guaranteed}
 # Try to use pyblake2 if available and not included in hashlib
 if any(blake_algo not in hash_algos for blake_algo in ('blake2b', 'blake2s')):
    try:
        import pyblake2
        hash_algos.update({
            'blake2b': pyblake2.blake2b,
            'blake2s': pyblake2.blake2s,
        })
    except ImportError:
        pass
 # Sort & filter shake algos because of variable length
 # Unix : https://metacpan.org/source/MSHELOR/Digest-SHA3-1.04/src/sha3.h#L70
 # https://metacpan.org/source/MSHELOR/Digest-SHA3-1.04/sha3sum#L220
 # -> output 1344b (336 hexchr) & 1088b (272 hexchr)
 # Why ?? 
 # https://keccak.team/software.html
 # https://crypto.stackexchange.com/questions/43718/if-the-output-size-of-shake128-256-is-variable-why-is-the-security-fixed-at-128
 # Because of https://csrc.nist.gov/CSRC/media/Projects/Cryptographic-Algorithm-Validation-Program/documents/sha3/sha3vs.pdf ?
 # OpenSSL https://github.com/openssl/openssl/blob/OpenSSL_1_1_1g/crypto/evp/m_sha3.c#L382
 # https://www.openssl.org/docs/man1.1.1/man3/EVP_sha3_224.html
 # -> output 128b (32 hexchr) & 256b (64 hexchr)
 hash_algos = OrderedDict((algo, hash_algos[algo]) for algo in sorted(hash_algos) if 'shake' not in algo)

 LongPathsEnabled = None
 if os.name == 'nt':
    import ctypes
    LongPathsEnabled = False
    ntdll = ctypes.WinDLL('ntdll')
    if hasattr(ntdll, 'RtlAreLongPathsEnabled'):
        ntdll.RtlAreLongPathsEnabled.restype = ctypes.c_ubyte
        ntdll.RtlAreLongPathsEnabled.argtypes = ()
        LongPathsEnabled = bool(ntdll.RtlAreLongPathsEnabled())


 class ArgumentDefaultsFileHelpFormatter(argparse.HelpFormatter):
    def _get_help_string(self, action):
        help = action.help
        if '%(default)' not in action.help and action.default is not argparse.SUPPRESS and action.default is not None:  # ==SUPPRESS==
            if action.option_strings or action.nargs in [argparse.OPTIONAL, argparse.ZERO_OR_MORE]:
                if action.default.__class__.__name__ == 'TextIOWrapper':
                    help += ' (default: {})'.format(action.default.name)
                else:
                    help += ' (default: %(default)s)'
        return help


 def main():
    if LongPathsEnabled is False:
        print(r'''
 /!\ LongPathsEnabled is not set, you'll face issue for paths longer than 255 characters.
 If you are under Win10 >= 1607, please consider setting
 HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem\LongPathsEnabled=1
 More informations at the end of the sources of this script.
 ''', file=sys.stderr)

    parser = argparse.ArgumentParser(
        formatter_class=ArgumentDefaultsFileHelpFormatter,
        description='Unix-like checksummer allowing to use any included Python algorithm',
        epilog='Files are read in binary mode, text mode makes no sense ..'
    )
    parser.add_argument('base_paths', metavar='BASE_PATH', nargs='+',  # type=str, nargs=None,
        help='base directory for the logging')

    parser.add_argument('-a', '--algos', nargs='+',
        choices=sorted(hash_algos, key=OrderedBrowser.sort_natural),
        default=hash_algos,
        help='algorithm(s) to hash files with')
    parser.add_argument('-r', '--recurse', action='store_true', help='process sub-directories')
    parser.add_argument('-L', '--dereference', action='store_true', help='follow symbolic links')
    parser.add_argument('--tag', action='store_true', help='use BSD style output "algo (path) = hash"')

    parser_order = parser.add_argument_group('processing order')
    parser_order.add_argument('-s', '--sort', choices=['no', 'lexicographical', 'natural'], default='natural',
        help='elements sorting method')
    parser_order.add_argument('-f', '--folders', choices=['first', 'last', 'with-files'], default='with-files',
        help='handling of the folders in the output')
    parser_order.add_argument('-d', '--dot-folders', choices=['same', 'separated', 'excepted'], default='same',
        help='with separated, dot folders will be placed with dot files (first or last _in_ dot files); '
             'with excepted, they will be treated like dot files')

    parser_output = parser.add_argument_group('output related')
    parser_output.add_argument('-v', '--stats', action='store_true', help='display stats on stderr at the end')
    # https://docs.python.org/3/library/functions.html#open
    parser_output.add_argument('-o', '--output', metavar='FILE', type=argparse.FileType('w'), default=sys.stdout,
        help='output the hash to a file (UTF-8 with BOM)')

    args = parser.parse_args()

    chksum = Checksummer(
        algos=args.algos,
        browser=OrderedBrowser(policy=args),
        recurse=args.recurse,
        out=args.output,
        tag_style=args.tag
    )
    for base_path in args.base_paths:
        chksum.checksum(base_path)

    if args.stats:
        stats = chksum.get_stats()
        if args.recurse:
            print('Hashed {} files in {} directories for a total of {} {}'.format(
                    stats['files'], stats['dirs'], *size_to_human(stats['bytes'])
                ), file=sys.stderr
            )
        else:
            print('Hashed {} files for a total of {} {}'.format(
                    stats['files'], *size_to_human(stats['bytes'])
                ), file=sys.stderr
            )


 def size_to_human(size, formatted_number=True, units_iec=True):
    prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
    prefix_index = 0
    multiplier = 1024 if units_iec else 1000
    while size >= multiplier and prefix_index < len(prefixes) - 1:
        size /= multiplier
        prefix_index += 1
    # 3.00 -> 3, 3.141 => 3.14 - https://stackoverflow.com/questions/2440692/formatting-floats-in-python-without-superfluous-zeros
    if formatted_number:
        size = format(size, '.2f').rstrip('0').rstrip('.')
    prefix = prefixes[prefix_index] if units_iec else prefixes[prefix_index].replace('i', '')
    return (size, prefix)


 class OrderedBrowser:
    def __init__(self, policy=None):
        if policy is not None:
            if policy.sort == 'no':
                self.sort_algo = None
            elif policy.sort == 'natural':
                self.sort_algo = self.sort_natural
            elif policy.sort == 'lexicographical':
                self.sort_algo = self.sort_lexicographical
            else:
                raise Exception('Unknown sort')

            # Check done by argparse
            self.folders = policy.folders
            self.dot_folders = policy.dot_folders
        else:
            self.sort_algo = self.sort_natural
            self.folders = 'last'
            self.dot_folders = 'same'
        self.scandir_method = os.scandir if hasattr(os, 'scandir') else self._fake_scandir

    @staticmethod
    def sort_natural(s, _nsre=re.compile('([0-9]+)')):
        return [
            int(text) if text.isdigit() else text.lower() if (i > 0 or len(text)) else '0'
            for i, text in enumerate(re.split(_nsre, s))
        ]

    @staticmethod
    def sort_lexicographical(s):
        return s

    @staticmethod
    def _fake_scandir(path):
        return map(lambda entry: FakeDirEntry(os.sep.join((path, entry))), os.listdir(path))

    def scandir(self, path, callback):
        try:
            entries = self.scandir_method(path)
        except Exception as e:
            print('Failed to list "{}": {}'.format(path, e), file=sys.stderr)
            return

        if self.sort_algo is not None:
            entries = list(entries)
            entries.sort(key=lambda dir_entry: self.sort_algo(dir_entry.name))

        if self.folders in ['first', 'last']:
            dirs = []
            files = []
            dot_dirs = []
            dot_files = []

            # Simplify logic - https://stackoverflow.com/questions/986006/how-do-i-pass-a-variable-by-reference
            if self.dot_folders == 'excepted':  # dot_dirs will actually fill dot_files
                dot_dirs = dot_files
            if self.dot_folders == 'same':  # actually fill dirs and files
                dot_dirs = dirs
                dot_files = files

            # Separate elements
            for entry in entries:
                # https://stackoverflow.com/questions/949098/python-split-a-list-based-on-a-condition
                # Avoid an aweful if/else stack ; /!\ false=[0], true=[1]
                ((files, dirs), (dot_files, dot_dirs))[entry.name[0] == '.'][entry.is_dir(follow_symlinks=False)].append(entry)

            # Handle logic
            if self.dot_folders == 'excepted':
                callback(dot_files)

            if self.folders == 'first':
                if self.dot_folders == 'separated':
                    callback(dot_dirs)
                    callback(dot_files)
                callback(dirs)
                callback(files)
            else:
                if self.dot_folders == 'separated':
                    callback(dot_files)
                    callback(dot_dirs)
                callback(files)
                callback(dirs)
        else:
            callback(entries)


 # To allow simpler bootstrap – https://docs.python.org/3/library/os.html#os.DirEntry
 class FakeDirEntry:
    def __init__(self, file_path):
        from os.path import basename
        self.path = file_path if os.name == 'nt' and len(file_path) == 3 and file_path[1] == ':' else file_path.rstrip(r'\/')
        self.name = basename(self.path)
        self.parent = self.path[:-(len(self.name) + 1)]
        self._cached_stat = [None, None]
        self.stat(follow_symlinks=False)  # Populate cache & exception directly if invalid path

    def inode(self):
        self.stat(follow_symlinks=False).st_ino

    def is_dir(self, follow_symlinks=True):
        return stat.S_ISDIR(self.stat(follow_symlinks=follow_symlinks).st_mode)

    def is_file(self, follow_symlinks=True):
        return stat.S_ISREG(self.stat(follow_symlinks=follow_symlinks).st_mode)

    def is_symlink(self):
        return stat.S_ISLNK(self.stat(follow_symlinks=False).st_mode)

    def stat(self, follow_symlinks=True):
        if self._cached_stat[follow_symlinks] is not None:
            return self._cached_stat[follow_symlinks]
        else:
            self._cached_stat[follow_symlinks] = os.stat(self.path) if follow_symlinks else os.lstat(self.path)
            return self._cached_stat[follow_symlinks]


 class Checksummer:
    chunk_size = 64 * 1024  # 64k

    def __init__(self, algos, browser=OrderedBrowser(), recurse=False, dereference=False, out=sys.stdout, tag_style=False):
        self.algos = OrderedDict((algo, self._algo_code2tag(algo)) for algo in algos)
        self.browser = browser
        self.recurse = recurse
        self.dereference = dereference
        self.out = out
        if tag_style or len(self.algos) > 1:
            self.out_template = '{algo} ({path}) = {hash}'
        else:
            self.out_template = '{hash}  {path}'
        self.tag_style = tag_style

        # Init stats
        self.total_dirs = self.total_files = self.total_files_size = 0

    def checksum(self, path):
        first = FakeDirEntry(path)
        if first.is_file(follow_symlinks=self.dereference):
            self._process_entry(first, recurse=False)
        elif first.is_dir(follow_symlinks=self.dereference):
            self.browser.scandir(path, lambda entries: self._process_entries(entries, self.recurse))
        else:
            raise Exception("Probably trying to run on a symlink, sorry they're not followed unless you ask for it")

    @staticmethod
    def _algo_code2tag(algo):
        algo = algo.replace('_', '-')
        i = 0
        for i, c in enumerate(algo):
            if c.isdigit():
                break
        return algo[:i].upper() + algo[i:]

    def get_stats(self):
        return {
            'dirs': self.total_dirs,
            'files': self.total_files,
            'bytes': self.total_files_size,
        }

    def _process_entries(self, entries, recurse=True):
        for entry in entries:
            self._process_entry(entry, recurse)

    def _process_entry(self, entry, recurse=True):
        if recurse and entry.is_dir(follow_symlinks=self.dereference):
            self.total_dirs += 1
            self.browser.scandir(entry.path, self._process_entries)
        elif entry.is_file(follow_symlinks=self.dereference):
            try:
                hash_objs = {algo: hash_algos[algo]() for algo in self.algos}
                with open(entry.path, 'rb') as entry_handle:
                    # read() would try to fit the file in memory ..
                    # https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
                    # https://docs.python.org/3/library/functions.html#iter
                    for chunk in iter(lambda: entry_handle.read(self.chunk_size), b''):
                        for hash_obj in hash_objs.values():
                            hash_obj.update(chunk)
                    entry_size = entry_handle.tell()  # Try to avoid to stat as we already know the size
            except Exception as e:
                print('Error hashing "{}": {}({})'.format(entry.path, e.__class__.__name__, e), file=sys.stderr)
            else:
                for algo_code, algo_tag in self.algos.items():
                    print(self.out_template.format(
                            algo=algo_tag, path=entry.path, hash=hash_objs[algo_code].hexdigest()
                        ),
                        file=self.out
                    )
                if len(self.algos) > 1:
                    print(file=self.out)
                self.total_files += 1
                self.total_files_size += entry_size


 if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        pass
    except IOError as e:
        if e.errno != errno.EPIPE:
            raise

 r'''
 About long paths on Windows:
 - https://bugs.python.org/issue18199
 -  https://bugs.python.org/issue27731
 - https://blogs.msdn.microsoft.com/jeremykuhne/2016/06/21/more-on-new-net-path-handling/
 -  https://blogs.msdn.microsoft.com/jeremykuhne/2016/07/30/net-4-6-2-and-long-paths-on-windows-10/
 - https://lifehacker.com/windows-10-allows-file-names-longer-than-260-characters-1785201032
 - https://betanews.com/2016/05/29/long-paths-windows-10/
 - https://social.msdn.microsoft.com/Forums/en-US/fc85630e-5684-4df6-ad2f-5a128de3deef/260-character-explorer-path-length-limit?forum=windowsgeneraldevelopmentissues
 - https://msdn.microsoft.com/en-us/library/aa365247%28VS.85%29.aspx?f=255&MSPPError=-2147217396#maxpath
 - https://msdn.microsoft.com/en-us/library/windows/desktop/aa374191(v=vs.85).aspx
 - https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath / https://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx#maxpath
 - https://github.com/python/cpython/blob/master/PC/python.manifest
 - https://stackoverflow.com/questions/36219317/pathname-too-long-to-open/36219497
 Contrary to what aa365247 says ("can also"), both the registry key AND the manifest are required ..

 Alternative solutions (workaround / hacks):
 - https://docs.python.org/3/library/os.html#os.listdir
    handle paths manually, with listdir for example, adding '\\?\' when required ; this is heavy, you must carry the path
 - https://docs.python.org/3/library/os.html#os.scandir
    scandir cannot be used directly with '\\?\', you lose the relative path, or
    using it with relative calls (cd each time) + other calls using '\\?\' you lose the interest of scandir
    (and it may not return the informations without \\.\ on long paths)
 - https://docs.python.org/3/library/pathlib.html / https://www.python.org/dev/peps/pep-0428/ / https://github.com/python/cpython/blob/3.6/Lib/pathlib.py
    pathlib doesn't handle '\\?\' and doesn't allow to chdir by itself ; also follows symlinks by default and doesn't use scandir

 Also, even if you cd for each browse to make open('short-name'), the full path can still cause issue. '\\.\' notation have to be used.
 And there's normalization things required as explained in the issues / blogs.
 And there's probably more, see https://bugs.python.org/issue18199#msg191035
 '''
	#!/usr/bin/env python
	# -- coding: utf-8 --

	from __future__ import print_function, unicode_literals
	import sys, errno, os, argparse, re, stat, hashlib
	from collections import OrderedDict

	hash_algos = {algo: getattr(hashlib, algo) for algo in hashlib.algorithms_guaranteed}
	# Try to use pyblake2 if available and not included in hashlib
	if any(blake_algo not in hash_algos for blake_algo in ('blake2b', 'blake2s')):
	try:
	import pyblake2
	hash_algos.update({
	'blake2b': pyblake2.blake2b,
	'blake2s': pyblake2.blake2s,
	})
	except ImportError:
	pass
	# Sort & filter shake algos because of variable length
	# Unix : https://metacpan.org/source/MSHELOR/Digest-SHA3-1.04/src/sha3.h#L70
	# https://metacpan.org/source/MSHELOR/Digest-SHA3-1.04/sha3sum#L220
	# -> output 1344b (336 hexchr) & 1088b (272 hexchr)
	# Why ??
	# https://keccak.team/software.html
	# https://crypto.stackexchange.com/questions/43718/if-the-output-size-of-shake128-256-is-variable-why-is-the-security-fixed-at-128
	# Because of https://csrc.nist.gov/CSRC/media/Projects/Cryptographic-Algorithm-Validation-Program/documents/sha3/sha3vs.pdf ?
	# OpenSSL https://github.com/openssl/openssl/blob/OpenSSL_1_1_1g/crypto/evp/m_sha3.c#L382
	# https://www.openssl.org/docs/man1.1.1/man3/EVP_sha3_224.html
	# -> output 128b (32 hexchr) & 256b (64 hexchr)
	hash_algos = OrderedDict((algo, hash_algos[algo]) for algo in sorted(hash_algos) if 'shake' not in algo)

	LongPathsEnabled = None
	if os.name == 'nt':
	import ctypes
	LongPathsEnabled = False
	ntdll = ctypes.WinDLL('ntdll')
	if hasattr(ntdll, 'RtlAreLongPathsEnabled'):
	ntdll.RtlAreLongPathsEnabled.restype = ctypes.c_ubyte
	ntdll.RtlAreLongPathsEnabled.argtypes = ()
	LongPathsEnabled = bool(ntdll.RtlAreLongPathsEnabled())


	class ArgumentDefaultsFileHelpFormatter(argparse.HelpFormatter):
	def _get_help_string(self, action):
	help = action.help
	if '%(default)' not in action.help and action.default is not argparse.SUPPRESS and action.default is not None: # ==SUPPRESS==
	if action.option_strings or action.nargs in [argparse.OPTIONAL, argparse.ZERO_OR_MORE]:
	if action.default.__class__.__name__ == 'TextIOWrapper':
	help += ' (default: {})'.format(action.default.name)
	else:
	help += ' (default: %(default)s)'
	return help


	def main():
	if LongPathsEnabled is False:
	print(r'''
	/!\ LongPathsEnabled is not set, you'll face issue for paths longer than 255 characters.
	If you are under Win10 >= 1607, please consider setting
	HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem\LongPathsEnabled=1
	More informations at the end of the sources of this script.
	''', file=sys.stderr)

	parser = argparse.ArgumentParser(
	formatter_class=ArgumentDefaultsFileHelpFormatter,
	description='Unix-like checksummer allowing to use any included Python algorithm',
	epilog='Files are read in binary mode, text mode makes no sense ..'
	)
	parser.add_argument('base_paths', metavar='BASE_PATH', nargs='+', # type=str, nargs=None,
	help='base directory for the logging')

	parser.add_argument('-a', '--algos', nargs='+',
	choices=sorted(hash_algos, key=OrderedBrowser.sort_natural),
	default=hash_algos,
	help='algorithm(s) to hash files with')
	parser.add_argument('-r', '--recurse', action='store_true', help='process sub-directories')
	parser.add_argument('-L', '--dereference', action='store_true', help='follow symbolic links')
	parser.add_argument('--tag', action='store_true', help='use BSD style output "algo (path) = hash"')

	parser_order = parser.add_argument_group('processing order')
	parser_order.add_argument('-s', '--sort', choices=['no', 'lexicographical', 'natural'], default='natural',
	help='elements sorting method')
	parser_order.add_argument('-f', '--folders', choices=['first', 'last', 'with-files'], default='with-files',
	help='handling of the folders in the output')
	parser_order.add_argument('-d', '--dot-folders', choices=['same', 'separated', 'excepted'], default='same',
	help='with separated, dot folders will be placed with dot files (first or last _in_ dot files); '
	'with excepted, they will be treated like dot files')

	parser_output = parser.add_argument_group('output related')
	parser_output.add_argument('-v', '--stats', action='store_true', help='display stats on stderr at the end')
	# https://docs.python.org/3/library/functions.html#open
	parser_output.add_argument('-o', '--output', metavar='FILE', type=argparse.FileType('w'), default=sys.stdout,
	help='output the hash to a file (UTF-8 with BOM)')

	args = parser.parse_args()

	chksum = Checksummer(
	algos=args.algos,
	browser=OrderedBrowser(policy=args),
	recurse=args.recurse,
	out=args.output,
	tag_style=args.tag
	)
	for base_path in args.base_paths:
	chksum.checksum(base_path)

	if args.stats:
	stats = chksum.get_stats()
	if args.recurse:
	print('Hashed {} files in {} directories for a total of {} {}'.format(
	stats['files'], stats['dirs'], *size_to_human(stats['bytes'])
	), file=sys.stderr
	)
	else:
	print('Hashed {} files for a total of {} {}'.format(
	stats['files'], *size_to_human(stats['bytes'])
	), file=sys.stderr
	)


	def size_to_human(size, formatted_number=True, units_iec=True):
	prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']
	prefix_index = 0
	multiplier = 1024 if units_iec else 1000
	while size >= multiplier and prefix_index < len(prefixes) - 1:
	size /= multiplier
	prefix_index += 1
	# 3.00 -> 3, 3.141 => 3.14 - https://stackoverflow.com/questions/2440692/formatting-floats-in-python-without-superfluous-zeros
	if formatted_number:
	size = format(size, '.2f').rstrip('0').rstrip('.')
	prefix = prefixes[prefix_index] if units_iec else prefixes[prefix_index].replace('i', '')
	return (size, prefix)


	class OrderedBrowser:
	def __init__(self, policy=None):
	if policy is not None:
	if policy.sort == 'no':
	self.sort_algo = None
	elif policy.sort == 'natural':
	self.sort_algo = self.sort_natural
	elif policy.sort == 'lexicographical':
	self.sort_algo = self.sort_lexicographical
	else:
	raise Exception('Unknown sort')

	# Check done by argparse
	self.folders = policy.folders
	self.dot_folders = policy.dot_folders
	else:
	self.sort_algo = self.sort_natural
	self.folders = 'last'
	self.dot_folders = 'same'
	self.scandir_method = os.scandir if hasattr(os, 'scandir') else self._fake_scandir

	@staticmethod
	def sort_natural(s, _nsre=re.compile('([0-9]+)')):
	return [
	int(text) if text.isdigit() else text.lower() if (i > 0 or len(text)) else '0'
	for i, text in enumerate(re.split(_nsre, s))
	]

	@staticmethod
	def sort_lexicographical(s):
	return s

	@staticmethod
	def _fake_scandir(path):
	return map(lambda entry: FakeDirEntry(os.sep.join((path, entry))), os.listdir(path))

	def scandir(self, path, callback):
	try:
	entries = self.scandir_method(path)
	except Exception as e:
	print('Failed to list "{}": {}'.format(path, e), file=sys.stderr)
	return

	if self.sort_algo is not None:
	entries = list(entries)
	entries.sort(key=lambda dir_entry: self.sort_algo(dir_entry.name))

	if self.folders in ['first', 'last']:
	dirs = []
	files = []
	dot_dirs = []
	dot_files = []

	# Simplify logic - https://stackoverflow.com/questions/986006/how-do-i-pass-a-variable-by-reference
	if self.dot_folders == 'excepted': # dot_dirs will actually fill dot_files
	dot_dirs = dot_files
	if self.dot_folders == 'same': # actually fill dirs and files
	dot_dirs = dirs
	dot_files = files

	# Separate elements
	for entry in entries:
	# https://stackoverflow.com/questions/949098/python-split-a-list-based-on-a-condition
	# Avoid an aweful if/else stack ; /!\ false=[0], true=[1]
	((files, dirs), (dot_files, dot_dirs))[entry.name[0] == '.'][entry.is_dir(follow_symlinks=False)].append(entry)

	# Handle logic
	if self.dot_folders == 'excepted':
	callback(dot_files)

	if self.folders == 'first':
	if self.dot_folders == 'separated':
	callback(dot_dirs)
	callback(dot_files)
	callback(dirs)
	callback(files)
	else:
	if self.dot_folders == 'separated':
	callback(dot_files)
	callback(dot_dirs)
	callback(files)
	callback(dirs)
	else:
	callback(entries)


	# To allow simpler bootstrap – https://docs.python.org/3/library/os.html#os.DirEntry
	class FakeDirEntry:
	def __init__(self, file_path):
	from os.path import basename
	self.path = file_path if os.name == 'nt' and len(file_path) == 3 and file_path[1] == ':' else file_path.rstrip(r'\/')
	self.name = basename(self.path)
	self.parent = self.path[:-(len(self.name) + 1)]
	self._cached_stat = [None, None]
	self.stat(follow_symlinks=False) # Populate cache & exception directly if invalid path

	def inode(self):
	self.stat(follow_symlinks=False).st_ino

	def is_dir(self, follow_symlinks=True):
	return stat.S_ISDIR(self.stat(follow_symlinks=follow_symlinks).st_mode)

	def is_file(self, follow_symlinks=True):
	return stat.S_ISREG(self.stat(follow_symlinks=follow_symlinks).st_mode)

	def is_symlink(self):
	return stat.S_ISLNK(self.stat(follow_symlinks=False).st_mode)

	def stat(self, follow_symlinks=True):
	if self._cached_stat[follow_symlinks] is not None:
	return self._cached_stat[follow_symlinks]
	else:
	self._cached_stat[follow_symlinks] = os.stat(self.path) if follow_symlinks else os.lstat(self.path)
	return self._cached_stat[follow_symlinks]


	class Checksummer:
	chunk_size = 64 * 1024 # 64k

	def __init__(self, algos, browser=OrderedBrowser(), recurse=False, dereference=False, out=sys.stdout, tag_style=False):
	self.algos = OrderedDict((algo, self._algo_code2tag(algo)) for algo in algos)
	self.browser = browser
	self.recurse = recurse
	self.dereference = dereference
	self.out = out
	if tag_style or len(self.algos) > 1:
	self.out_template = '{algo} ({path}) = {hash}'
	else:
	self.out_template = '{hash} {path}'
	self.tag_style = tag_style

	# Init stats
	self.total_dirs = self.total_files = self.total_files_size = 0

	def checksum(self, path):
	first = FakeDirEntry(path)
	if first.is_file(follow_symlinks=self.dereference):
	self._process_entry(first, recurse=False)
	elif first.is_dir(follow_symlinks=self.dereference):
	self.browser.scandir(path, lambda entries: self._process_entries(entries, self.recurse))
	else:
	raise Exception("Probably trying to run on a symlink, sorry they're not followed unless you ask for it")

	@staticmethod
	def _algo_code2tag(algo):
	algo = algo.replace('_', '-')
	i = 0
	for i, c in enumerate(algo):
	if c.isdigit():
	break
	return algo[:i].upper() + algo[i:]

	def get_stats(self):
	return {
	'dirs': self.total_dirs,
	'files': self.total_files,
	'bytes': self.total_files_size,
	}

	def _process_entries(self, entries, recurse=True):
	for entry in entries:
	self._process_entry(entry, recurse)

	def _process_entry(self, entry, recurse=True):
	if recurse and entry.is_dir(follow_symlinks=self.dereference):
	self.total_dirs += 1
	self.browser.scandir(entry.path, self._process_entries)
	elif entry.is_file(follow_symlinks=self.dereference):
	try:
	hash_objs = {algo: hash_algos[algo]() for algo in self.algos}
	with open(entry.path, 'rb') as entry_handle:
	# read() would try to fit the file in memory ..
	# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
	# https://docs.python.org/3/library/functions.html#iter
	for chunk in iter(lambda: entry_handle.read(self.chunk_size), b''):
	for hash_obj in hash_objs.values():
	hash_obj.update(chunk)
	entry_size = entry_handle.tell() # Try to avoid to stat as we already know the size
	except Exception as e:
	print('Error hashing "{}": {}({})'.format(entry.path, e.__class__.__name__, e), file=sys.stderr)
	else:
	for algo_code, algo_tag in self.algos.items():
	print(self.out_template.format(
	algo=algo_tag, path=entry.path, hash=hash_objs[algo_code].hexdigest()
	),
	file=self.out
	)
	if len(self.algos) > 1:
	print(file=self.out)
	self.total_files += 1
	self.total_files_size += entry_size


	if __name__ == '__main__':
	try:
	main()
	except KeyboardInterrupt:
	pass
	except IOError as e:
	if e.errno != errno.EPIPE:
	raise

	r'''
	About long paths on Windows:
	- https://bugs.python.org/issue18199
	- https://bugs.python.org/issue27731
	- https://blogs.msdn.microsoft.com/jeremykuhne/2016/06/21/more-on-new-net-path-handling/
	- https://blogs.msdn.microsoft.com/jeremykuhne/2016/07/30/net-4-6-2-and-long-paths-on-windows-10/
	- https://lifehacker.com/windows-10-allows-file-names-longer-than-260-characters-1785201032
	- https://betanews.com/2016/05/29/long-paths-windows-10/
	- https://social.msdn.microsoft.com/Forums/en-US/fc85630e-5684-4df6-ad2f-5a128de3deef/260-character-explorer-path-length-limit?forum=windowsgeneraldevelopmentissues
	- https://msdn.microsoft.com/en-us/library/aa365247%28VS.85%29.aspx?f=255&MSPPError=-2147217396#maxpath
	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa374191(v=vs.85).aspx
	- https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath / https://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx#maxpath
	- https://github.com/python/cpython/blob/master/PC/python.manifest
	- https://stackoverflow.com/questions/36219317/pathname-too-long-to-open/36219497
	Contrary to what aa365247 says ("can also"), both the registry key AND the manifest are required ..

	Alternative solutions (workaround / hacks):
	- https://docs.python.org/3/library/os.html#os.listdir
	handle paths manually, with listdir for example, adding '\\?\' when required ; this is heavy, you must carry the path
	- https://docs.python.org/3/library/os.html#os.scandir
	scandir cannot be used directly with '\\?\', you lose the relative path, or
	using it with relative calls (cd each time) + other calls using '\\?\' you lose the interest of scandir
	(and it may not return the informations without \\.\ on long paths)
	- https://docs.python.org/3/library/pathlib.html / https://www.python.org/dev/peps/pep-0428/ / https://github.com/python/cpython/blob/3.6/Lib/pathlib.py
	pathlib doesn't handle '\\?\' and doesn't allow to chdir by itself ; also follows symlinks by default and doesn't use scandir

	Also, even if you cd for each browse to make open('short-name'), the full path can still cause issue. '\\.\' notation have to be used.
	And there's normalization things required as explained in the issues / blogs.
	And there's probably more, see https://bugs.python.org/issue18199#msg191035
	'''