tilacog · December 20, 2017 10:43
diff --git a/spedscrambler.py b/spedscrambler.py
 import collections
 import datetime
 import hashlib
 import random
 import string
 import sys
 from contextlib import suppress


 random.seed(0)
 done = collections.Counter()
 maximum = 5


 def max_for(text):
    hasher = hashlib.blake2b()
    hasher.update(text.encode('utf-8'))
    return sum(hasher.digest()) % maximum


 def random_string(size=6, chars=string.ascii_uppercase + string.digits):
    return ''.join(random.choice(chars) for _ in range(size))


 def scramble(value):
    # handle dates
    with suppress(Exception):
        datetime.datetime.strptime(value, "%d%m%Y")
        return value

    # handle floats
    with suppress(Exception):
        if ',' in value:
            float(value.replace(',', '.'))
            return value

    # identifiers, do not touch
    if value == "LECD" or value == "LECF":
        return value

    # randomize digits if only digits
    if value.isdigit():
        return random_string(size=len(value), chars=string.digits)

    # randomize generally
    return random_string(size=len(value))


 def run(line):
    if not line.startswith('|'):
        return
    try:
        _, record_type, *values, _ = line.split('|')
    except Exception:
        return
    if done[record_type] < max_for(record_type):
        new_values = '|'.join(scramble(v) for v in values)
        print(f'|{record_type}|{new_values}|')
        done.update([record_type])


 if __name__ == '__main__':
    filepath = sys.argv[1]
    with open(filepath, encoding='latin1') as f:
        for line in f:
            run(line)
	import collections
	import datetime
	import hashlib
	import random
	import string
	import sys
	from contextlib import suppress


	random.seed(0)
	done = collections.Counter()
	maximum = 5


	def max_for(text):
	hasher = hashlib.blake2b()
	hasher.update(text.encode('utf-8'))
	return sum(hasher.digest()) % maximum


	def random_string(size=6, chars=string.ascii_uppercase + string.digits):
	return ''.join(random.choice(chars) for _ in range(size))


	def scramble(value):
	# handle dates
	with suppress(Exception):
	datetime.datetime.strptime(value, "%d%m%Y")
	return value

	# handle floats
	with suppress(Exception):
	if ',' in value:
	float(value.replace(',', '.'))
	return value

	# identifiers, do not touch
	if value == "LECD" or value == "LECF":
	return value

	# randomize digits if only digits
	if value.isdigit():
	return random_string(size=len(value), chars=string.digits)

	# randomize generally
	return random_string(size=len(value))


	def run(line):
	if not line.startswith('\|'):
	return
	try:
	_, record_type, *values, _ = line.split('\|')
	except Exception:
	return
	if done[record_type] < max_for(record_type):
	new_values = '\|'.join(scramble(v) for v in values)
	print(f'\|{record_type}\|{new_values}\|')
	done.update([record_type])


	if __name__ == '__main__':
	filepath = sys.argv[1]
	with open(filepath, encoding='latin1') as f:
	for line in f:
	run(line)