Skip to content

Instantly share code, notes, and snippets.

@tilacog
Last active December 20, 2017 10:43
import collections
import datetime
import hashlib
import random
import string
import sys
from contextlib import suppress
random.seed(0)
done = collections.Counter()
maximum = 5
def max_for(text):
hasher = hashlib.blake2b()
hasher.update(text.encode('utf-8'))
return sum(hasher.digest()) % maximum
def random_string(size=6, chars=string.ascii_uppercase + string.digits):
return ''.join(random.choice(chars) for _ in range(size))
def scramble(value):
# handle dates
with suppress(Exception):
datetime.datetime.strptime(value, "%d%m%Y")
return value
# handle floats
with suppress(Exception):
if ',' in value:
float(value.replace(',', '.'))
return value
# identifiers, do not touch
if value == "LECD" or value == "LECF":
return value
# randomize digits if only digits
if value.isdigit():
return random_string(size=len(value), chars=string.digits)
# randomize generally
return random_string(size=len(value))
def run(line):
if not line.startswith('|'):
return
try:
_, record_type, *values, _ = line.split('|')
except Exception:
return
if done[record_type] < max_for(record_type):
new_values = '|'.join(scramble(v) for v in values)
print(f'|{record_type}|{new_values}|')
done.update([record_type])
if __name__ == '__main__':
filepath = sys.argv[1]
with open(filepath, encoding='latin1') as f:
for line in f:
run(line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment