Last active
December 17, 2015 05:39
-
-
Save josiahcarlson/5559103 to your computer and use it in GitHub Desktop.
Steganography in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
Steganogrphy in Python | |
Copyright 2013 Josiah Carlson | |
Released under the GNU LGPL v2.1 license | |
What, how, why, etc, are discussed: | |
http://www.dr-josiah.com/2013/05/steganography-in-python.html | |
''' | |
from cStringIO import StringIO | |
import functools | |
import getpass | |
import glob | |
from hashlib import sha1, sha256 | |
import hmac | |
import optparse | |
import re | |
import tokenize | |
CODING_RE = re.compile("coding[:=]\s*([-\w.]+)") | |
CHR = u'\xa0' | |
def detect_encoding(fobj): | |
# Detects the character encoding of a file based on the Python coding | |
# declaration. Returns the space replacement encoded and the encoding. | |
lines = [] | |
fobj.seek(0) | |
for i, line in enumerate(fobj): | |
lines.append(line) | |
if i >= 1: | |
break | |
encoding = CODING_RE.search('\n'.join(lines)) | |
encoding = encoding.group(1) if encoding else '' | |
fobj.seek(0) | |
return CHR.encode(encoding) if encoding else '', encoding | |
def clean(fname, w=False): | |
# Cleans a file's contents to have no hidden data. Useful for prepping | |
# new data, or prepping to count the available bits in a file. | |
inp = StringIO(open(fname, 'rb').read()) | |
enc = detect_encoding(inp)[0] | |
tokens = [] | |
try: | |
for toknum, tokval, _1, _2, _3 in tokenize.generate_tokens(inp.readline): | |
if toknum == tokenize.INDENT: | |
tokens.append((toknum, tokval.replace('\t', 8*' '), _1, _2, _3)) | |
elif enc and toknum == tokenize.COMMENT: | |
if 'coding:' in tokval or 'coding=' in tokval: | |
tokens.append((toknum, tokval, _1, _2, _3)) | |
continue | |
tokens.append((toknum, tokval.replace(enc, ' '), _1, _2, _3)) | |
else: | |
tokens.append((toknum, tokval, _1, _2, _3)) | |
except tokenize.TokenError: | |
print "-ERR", fname | |
return | |
try: | |
cleaned = tokenize.untokenize(tokens) | |
except AssertionError: | |
print "-ERR", fname | |
return | |
if w: | |
with open(fname, 'wb') as out: | |
out.write(cleaned) | |
return cleaned | |
def clean_and_pass(f): | |
# A quick decorator to clean the content of a file and pass it to a | |
# function. | |
@functools.wraps(f) | |
def cnp(fname, *args, **kwargs): | |
inp = kwargs.pop('inp', None) | |
if not inp: | |
r = clean(fname, False) | |
if r is None: | |
return | |
inp = StringIO(r) | |
return f(inp, fname, *args, **kwargs) | |
return cnp | |
def _count(inp): | |
# Count the number of bits that can be hidden in a given file. Used to | |
# verify that data can actually be embedded. | |
enc = detect_encoding(inp)[0] | |
count = 0 | |
for toknum, tokval, _, _, _ in tokenize.generate_tokens(inp.readline): | |
if toknum == tokenize.INDENT: | |
count += tokval.replace(8*' ', '\t').count('\t') | |
elif enc and toknum == tokenize.COMMENT: | |
cnt = count | |
count += tokval.count(' ') | |
return count | |
@clean_and_pass | |
def count_bits(inp, fname, p=True): | |
# Wrapper that prints the results of the bit counting. | |
count = _count(inp) | |
if p: | |
print count, fname | |
return count | |
@clean_and_pass | |
def add_bits(inp, fname, hex_data): | |
# Actually add bits to a file. | |
bits_needed = 8 + len(hex_data) * 4 | |
bits_available = min(_count(inp), 1032) | |
if bits_needed > bits_available: | |
print "-ERR %s - %s/%s available"%(fname, bits_needed, bits_available) | |
return | |
inp.seek(0) | |
enc = detect_encoding(inp)[0] | |
if enc: | |
BITS = [' ', enc] | |
hex_data = '%02x'%(len(hex_data)-1) + hex_data | |
hex_data = int(hex_data, 16) | |
bits = [] | |
while len(bits) < bits_needed: | |
bits.append(hex_data & 1) | |
hex_data >>= 1 | |
tokens = [] | |
for toknum, tokval, _1, _2, _3 in tokenize.generate_tokens(inp.readline): | |
if bits and toknum == tokenize.INDENT and len(tokval) >= 8: | |
tv = [tokval[i:i+8] for i in xrange(0, len(tokval), 8)] | |
for i in xrange(len(tv)): | |
if len(tv[i]) == 8: | |
if bits.pop(): | |
tv[i] = '\t' | |
if not bits: | |
break | |
tokens.append((toknum, ''.join(tv), _1, _2, _3)) | |
elif enc and bits and toknum == tokenize.COMMENT: | |
if 'coding:' in tokval or 'coding=' in tokval: | |
tokens.append((toknum, tokval, _1, _2, _3)) | |
continue | |
tv = tokval.split(' ') | |
xx = 0 | |
for i in xrange(len(tv)-1): | |
if bits: | |
xx <<= 1 | |
xx += bits[-1] | |
tv[i] += BITS[bits.pop()] if bits else ' ' | |
tv = ''.join(tv) | |
tokens.append((toknum, tv, _1, _2, _3)) | |
else: | |
tokens.append((toknum, tokval, _1, _2, _3)) | |
with open(fname, 'wb') as out: | |
out.write(tokenize.untokenize(tokens)) | |
def read_bits(fname, p=True): | |
# Read bits from a file. | |
inp = StringIO(open(fname, 'rb').read()) | |
encoding = detect_encoding(inp)[1] | |
chars = (u' ', CHR) | |
bits = 0 | |
bc = 0 | |
found = False | |
done = False | |
for toknum, tokval, _1, _2, _3 in tokenize.generate_tokens(inp.readline): | |
if toknum == tokenize.INDENT: | |
tdata = tokval.replace('\t', 8*' ') | |
if len(tdata) < 8: | |
continue | |
for i in xrange(0, len(tdata)-7, 8): | |
bits <<= 1 | |
bc += 1 | |
if tokval.startswith('\t'): | |
bits += 1 | |
tokval = tokval[1:] | |
else: | |
tokval = tokval[8:] | |
if not found and bc == 8: | |
found = 4 * (bits + 1) | |
bits = bc = 0 | |
elif bc == found: | |
done = True | |
break | |
elif toknum == tokenize.COMMENT: | |
if 'coding:' in tokval or 'coding=' in tokval: | |
continue | |
tokval = tokval.decode(encoding) | |
bcc = bc | |
for ch in tokval: | |
if ch not in chars: | |
continue | |
bits <<= 1 | |
bc += 1 | |
bits += chars.index(ch) | |
if not found and bc == 8: | |
found = 4 * (bits + 1) | |
bits = bc = bcc = 0 | |
elif bc == found: | |
done = True | |
break | |
if done: | |
break | |
found //= 4 | |
data = ('%%0%sx'%found)%bits | |
if p: | |
print data, fname | |
return data | |
def _calc_hmac(inp, pw, hfun): | |
# Use some key derivation to make the hmac more difficult to crack. | |
hash = '' | |
for i in xrange(32768): | |
hash = hfun(hash + pw + "%02x"%i).digest() | |
inp.seek(0) | |
return hmac.new(hash, inp.read(), hfun).hexdigest() | |
@clean_and_pass | |
def embed_hmac(inp, fname, hfun, w=True): | |
# Actually embed an hmac into a file | |
required = 8 + hfun().digest_size * 8 | |
available = _count(inp) | |
if available < required: | |
print "-ERR %s - %s/%s available"%(fname, required, available) | |
return | |
pw = getpass.getpass("hmac password:") | |
hash = _calc_hmac(inp, pw, hfun) | |
if w: | |
inp.seek(0) | |
add_bits(fname, hash, inp=inp) | |
return hash | |
def check_hmac(fname, hfun): | |
# Verify an embedded hmac matches | |
digest = read_bits(fname, False) | |
hm = embed_hmac(fname, hfun, False) | |
if sum(ord(x) ^ ord(y) for x,y in zip(digest,hm)) + (len(digest) ^ len(hm)): | |
print '-NOMATCH', fname | |
else: | |
print '+MATCH', fname | |
if __name__ == '__main__': | |
from optparse import OptionParser | |
parser = OptionParser() | |
parser.usage = "%prog [options] <file, files, or filename mask>" | |
parser.add_option('--bits', dest='count', action='store_true', default=False, | |
help="Count the number of available bits in files") | |
parser.add_option('--write', dest='write', action='store', default='', | |
help="Add the provided hex data to the file, replacing any previously hidden data") | |
parser.add_option('--read', dest='read', action='store_true', default=False, | |
help="Read the bits from the files and return their hex values") | |
parser.add_option('--clean', dest='flush', action='store_true', default=False, | |
help="Clean the provided files of hidden bits") | |
parser.add_option('--sha1', dest='sha1', action='store_true', default=False, | |
help="Generate the sha1 hmac of the file, and hide the hmac in the file (a password prompt will be provided)") | |
parser.add_option('--check-sha1', dest='csha1', action='store_true', default=False, | |
help="Check the embedded sha1 hmac of the file (a password prompt will be provided)") | |
parser.add_option('--sha256', dest='sha256', action='store_true', default=False, | |
help="Generate the sha256 hmac of the file, and hide the hmac in the file (a password prompt will be provided)") | |
parser.add_option('--check-sha256', dest='csha256', action='store_true', default=False, | |
help="Check the embedded sha256 hmac of the file (a password prompt will be provided)") | |
options, args = parser.parse_args() | |
# Glob expansion for Windows | |
_args = [] | |
for arg in args: | |
_args.extend(glob.glob(arg)) | |
args = _args | |
if not args: | |
print "You must provide at least one file to manipulate\n" | |
parser.parse_args(['-h']) | |
raise SystemExit | |
if options.count: | |
_ = map(count_bits, args) | |
elif options.write: | |
add_bits(args[0], options.write) | |
elif options.read: | |
_ = map(read_bits, args) | |
elif options.flush: | |
_ = [clean(a, True) for a in args] | |
elif options.sha1: | |
_ = embed_hmac(args[0], sha1) | |
elif options.sha256: | |
_ = embed_hmac(args[0], sha256) | |
elif options.csha1: | |
_ = check_hmac(args[0], sha1) | |
elif options.csha256: | |
_ = check_hmac(args[0], sha256) | |
else: | |
print "Nothing to do!\n" | |
parser.parse_args(['-h']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment