Created
July 26, 2017 19:53
-
-
Save skywodd/5a0b1ab3323fa98ee12aaf4ea1af3cd5 to your computer and use it in GitHub Desktop.
Unicoder - The unicode nightmare for developers.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Unicoder - The unicode nightmare for developers. | |
""" | |
import os | |
import sys | |
import random | |
import codecs | |
import argparse | |
import unicodedata | |
from collections import defaultdict | |
from PIL import ImageFont | |
# Package information | |
__author__ = "Fabien Batteix (@skywodd)" | |
__copyright__ = "Copyright 2017 TamiaLab" | |
__credits__ = ["Fabien Batteix", "TamiaLab"] | |
__license__ = "GPLv3" | |
__version__ = "1.0.0" | |
__maintainer__ = "Fabien Batteix" | |
__email__ = "[email protected]" | |
__status__ = "Production" # *wink* | |
def verbose_print(verbose, *args, **kwargs): | |
""" Verbose print helper """ | |
if verbose: | |
print(*args, **kwargs) | |
def build_lookalike_map(font='cour.ttf', font_size=20): | |
""" | |
Search for unicode look-alike and build a bi-directional | |
map for each unicode characters in range 0x20 to 0xFFFF. | |
""" | |
footprint_lut = {} | |
lookalike_lut = defaultdict(list) | |
font = ImageFont.truetype(font, font_size) | |
for c in map(chr, range(sys.maxunicode + 1)): | |
if unicodedata.category(c) not in { | |
'So', 'Sm', 'Sc', | |
'Ps', 'Po', 'Pi', | |
'Pf', 'Pe', 'Pd', | |
'No', 'Nl', 'Nd', | |
'Lu', 'Lt', 'Lo', 'Ll' | |
}: | |
continue | |
pixels = font.getmask(c, mode='1') | |
footprint = bytes(pixels) | |
lookalike_lut[footprint].append(c) | |
footprint_lut[c] = footprint | |
return footprint_lut, lookalike_lut | |
def lookalike_replacement(c, lut, shuffle): | |
""" Get the replacement for the given character """ | |
footprint_lut, lookalike_lut = lut | |
footprint = footprint_lut.get(c) | |
if footprint is None: | |
return c | |
lookalike = lookalike_lut[footprint] | |
if shuffle: | |
return random.choice(lookalike) | |
else: | |
for nc in lookalike: | |
if nc != c: | |
return nc | |
return c | |
def unicoder_file(path, lut, verbose=False, shuffle=False): | |
""" Backup testing routine. """ | |
# Test if file exist first | |
if not os.path.exists(path): | |
print('File not found "{}"'.format(path), file=sys.stderr) | |
return | |
# Special case for directories | |
if os.path.isdir(path): | |
print('Skipping directory "{}"'.format(path), file=sys.stderr) | |
return | |
# Open the file and get shit done | |
with codecs.open(path, 'r+', encoding='utf8') as f: | |
data = f.read() | |
f.seek(0) | |
data = map(lambda c: lookalike_replacement(c, lut, shuffle), data) | |
f.write(''.join(data)) | |
# Main entry point | |
if __name__ == '__main__': | |
# Arguments parser | |
parser = argparse.ArgumentParser( | |
description='Turn a source code (or any text) file(s) into garbage using unicode look-alike characters.', | |
epilog='The author of this program IS NOT responsible for any damage made with it.' | |
) | |
parser.add_argument('--version', action='version', | |
version='Unicoder {} {}'.format(__version__, __copyright__)) | |
parser.add_argument('paths', metavar='FILE', nargs='+', | |
help='File(s) path(s) to be processed.') | |
parser.add_argument('--verbose', dest='verbose', action='store_true', | |
help='output more information during the files processing.') | |
parser.add_argument('--shuffle', dest='shuffle', action='store_true', | |
help='randomize the unicode replacement process.') | |
parser.add_argument('--font', dest='font', default='cour.ttf', | |
help='target font for look-alike detection (default is cour.ttf).') | |
parser.add_argument('--font-size', metavar='SIZE', dest='font_size', type=int, default=20, | |
help='font size to use for look-alike detection (default is 20).') | |
parser.add_argument('--russian-roulette', dest='russian_roulette', action='store_true', | |
help='one chance out of six to get shot.') | |
args = parser.parse_args(['test.py']) | |
# Criticaly important warning | |
print("Hi user!") | |
print("Seem like you're about to make a terrible mistake or a stupid joke.") | |
print("Do whatever you want to. I'm a program, not your mom. But be sure to understand this warning first.") | |
print("This program *WILL DESTROY* the given file(s) by replacing all caracters with unicode look-alike.") | |
print("Cancel the execution of this program *NOW* if you don't have a tested backup of all files!") | |
print("The author of this program *IS NOT* responsible for any damage made with this program.") | |
print("Use this program *AT YOUR OWN RISKS*!") | |
print() | |
if input('Type "I agree" to continue: ').strip('"').lower() != 'i agree': | |
print('Execution cancelled') | |
exit(1) | |
# Build the LUT | |
verbose_print(args.verbose, 'Building look-alike map for font "{}" at size {} ...'.format(args.font, args.font_size)) | |
lut = build_lookalike_map(args.font, args.font_size) | |
verbose_print(args.verbose, 'Look-alike map generated!') | |
# Process each path | |
verbose_print(args.verbose, 'Starting file(s) processing ...') | |
for path in args.paths: | |
verbose_print(args.verbose, 'Processing: {}'.format(path)) | |
# Trigger the routine only 1/6th of the times in russian roulette mode | |
if args.russian_roulette and random.randrange(6): | |
print('*click*') | |
continue | |
# Too late to cancel | |
unicoder_file(path, lut, | |
verbose=args.verbose, | |
shuffle=args.shuffle) | |
# End-of-script | |
verbose_print(args.verbose, 'File(s) processing done.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment