Last active
April 1, 2020 19:05
-
-
Save williballenthin/ab23abd5eec5bf5a272bfcfb2342ec04 to your computer and use it in GitHub Desktop.
parse SavedState artifacts extracted from OSX.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
derived from plistlib.py -- a tool to generate and parse MacOSX .plist files. | |
edited by: Willi Ballenthin ([email protected]) | |
changes: | |
- remove all but the binary plist parser | |
- add support for UID fields, see https://bugs.python.org/issue26707 | |
""" | |
import os | |
import struct | |
import binascii | |
import datetime | |
from io import BytesIO | |
# | |
# Binary Plist | |
# | |
class UID: | |
def __init__(self, data): | |
if not isinstance(data, int): | |
raise TypeError("data must be an int") | |
if data >= 1 << 64: | |
raise ValueError("UIDs cannot be >= 2**64") | |
if data < 0: | |
raise ValueError("UIDs must be positive") | |
self.data = data | |
def __index__(self): | |
return self.data | |
def __repr__(self): | |
return "%s(%s)" % (self.__class__.__name__, repr(self.data)) | |
def __reduce__(self): | |
return self.__class__, (self.data,) | |
def __eq__(self, other): | |
if not isinstance(other, UID): | |
return NotImplemented | |
return self.data == other.data | |
def __hash__(self): | |
return hash(self.data) | |
class Data: | |
""" | |
Wrapper for binary data. | |
This class is deprecated, use a bytes object instead. | |
""" | |
def __init__(self, data): | |
if not isinstance(data, bytes): | |
raise TypeError("data must be as bytes") | |
self.data = data | |
@classmethod | |
def fromBase64(cls, data): | |
# base64.decodebytes just calls binascii.a2b_base64; | |
# it seems overkill to use both base64 and binascii. | |
return cls(_decode_base64(data)) | |
def asBase64(self, maxlinelength=76): | |
return _encode_base64(self.data, maxlinelength) | |
def __eq__(self, other): | |
if isinstance(other, self.__class__): | |
return self.data == other.data | |
elif isinstance(other, bytes): | |
return self.data == other | |
else: | |
return NotImplemented | |
def __repr__(self): | |
return "%s(%s)" % (self.__class__.__name__, repr(self.data)) | |
def _encode_base64(s, maxlinelength=76): | |
# copied from base64.encodebytes(), with added maxlinelength argument | |
maxbinsize = (maxlinelength//4)*3 | |
pieces = [] | |
for i in range(0, len(s), maxbinsize): | |
chunk = s[i: i + maxbinsize] | |
pieces.append(binascii.b2a_base64(chunk)) | |
return b''.join(pieces) | |
def _decode_base64(s): | |
if isinstance(s, str): | |
return binascii.a2b_base64(s.encode("utf-8")) | |
else: | |
return binascii.a2b_base64(s) | |
class InvalidFileException (ValueError): | |
def __init__(self, message="Invalid file"): | |
ValueError.__init__(self, message) | |
_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'} | |
_undefined = object() | |
class _BinaryPlistParser: | |
""" | |
Read or write a binary plist file, following the description of the binary | |
format. Raise InvalidFileException in case of error, otherwise return the | |
root object. | |
see also: http://opensource.apple.com/source/CF/CF-744.18/CFBinaryPList.c | |
""" | |
def __init__(self, use_builtin_types, dict_type): | |
self._use_builtin_types = use_builtin_types | |
self._dict_type = dict_type | |
def parse(self, fp): | |
try: | |
# The basic file format: | |
# HEADER | |
# object... | |
# refid->offset... | |
# TRAILER | |
self._fp = fp | |
self._fp.seek(-32, os.SEEK_END) | |
trailer = self._fp.read(32) | |
if len(trailer) != 32: | |
raise InvalidFileException() | |
( | |
offset_size, self._ref_size, num_objects, top_object, | |
offset_table_offset | |
) = struct.unpack('>6xBBQQQ', trailer) | |
self._fp.seek(offset_table_offset) | |
self._object_offsets = self._read_ints(num_objects, offset_size) | |
self._objects = [_undefined] * num_objects | |
return self._read_object(top_object) | |
except (OSError, IndexError, struct.error, OverflowError, | |
UnicodeDecodeError): | |
raise InvalidFileException() | |
def _get_size(self, tokenL): | |
""" return the size of the next object.""" | |
if tokenL == 0xF: | |
m = self._fp.read(1)[0] & 0x3 | |
s = 1 << m | |
f = '>' + _BINARY_FORMAT[s] | |
return struct.unpack(f, self._fp.read(s))[0] | |
return tokenL | |
def _read_ints(self, n, size): | |
data = self._fp.read(size * n) | |
if size in _BINARY_FORMAT: | |
return struct.unpack('>' + _BINARY_FORMAT[size] * n, data) | |
else: | |
if not size or len(data) != size * n: | |
raise InvalidFileException() | |
return tuple(int.from_bytes(data[i: i + size], 'big') | |
for i in range(0, size * n, size)) | |
def _read_refs(self, n): | |
return self._read_ints(n, self._ref_size) | |
def _read_object(self, ref): | |
""" | |
read the object by reference. | |
May recursively read sub-objects (content of an array/dict/set) | |
""" | |
result = self._objects[ref] | |
if result is not _undefined: | |
return result | |
offset = self._object_offsets[ref] | |
self._fp.seek(offset) | |
token = self._fp.read(1)[0] | |
tokenH, tokenL = token & 0xF0, token & 0x0F | |
if token == 0x00: | |
result = None | |
elif token == 0x08: | |
result = False | |
elif token == 0x09: | |
result = True | |
# The referenced source code also mentions URL (0x0c, 0x0d) and | |
# UUID (0x0e), but neither can be generated using the Cocoa libraries. | |
elif token == 0x0f: | |
result = b'' | |
elif tokenH == 0x10: # int | |
result = int.from_bytes(self._fp.read(1 << tokenL), | |
'big', signed=tokenL >= 3) | |
elif token == 0x22: # real | |
result = struct.unpack('>f', self._fp.read(4))[0] | |
elif token == 0x23: # real | |
result = struct.unpack('>d', self._fp.read(8))[0] | |
elif token == 0x33: # date | |
f = struct.unpack('>d', self._fp.read(8))[0] | |
# timestamp 0 of binary plists corresponds to 1/1/2001 | |
# (year of Mac OS X 10.0), instead of 1/1/1970. | |
result = (datetime.datetime(2001, 1, 1) + | |
datetime.timedelta(seconds=f)) | |
elif tokenH == 0x40: # data | |
s = self._get_size(tokenL) | |
if self._use_builtin_types: | |
result = self._fp.read(s) | |
else: | |
result = Data(self._fp.read(s)) | |
elif tokenH == 0x50: # ascii string | |
s = self._get_size(tokenL) | |
result = self._fp.read(s).decode('ascii') | |
elif tokenH == 0x60: # unicode string | |
s = self._get_size(tokenL) | |
result = self._fp.read(s * 2).decode('utf-16be') | |
elif tokenH == 0x80: # UID | |
# used by Key-Archiver plist files | |
result = UID(int.from_bytes(self._fp.read(1 + tokenL), 'big')) | |
elif tokenH == 0xA0: # array | |
s = self._get_size(tokenL) | |
obj_refs = self._read_refs(s) | |
result = [] | |
self._objects[ref] = result | |
result.extend(self._read_object(x) for x in obj_refs) | |
# tokenH == 0xB0 is documented as 'ordset', but is not actually | |
# implemented in the Apple reference code. | |
# tokenH == 0xC0 is documented as 'set', but sets cannot be used in | |
# plists. | |
elif tokenH == 0xD0: # dict | |
s = self._get_size(tokenL) | |
key_refs = self._read_refs(s) | |
obj_refs = self._read_refs(s) | |
result = self._dict_type() | |
self._objects[ref] = result | |
for k, o in zip(key_refs, obj_refs): | |
result[self._read_object(k)] = self._read_object(o) | |
else: | |
raise InvalidFileException() | |
self._objects[ref] = result | |
return result | |
def _count_to_size(count): | |
if count < 1 << 8: | |
return 1 | |
elif count < 1 << 16: | |
return 2 | |
elif count << 1 << 32: | |
return 4 | |
else: | |
return 8 | |
def _is_fmt_binary(header): | |
return header[:8] == b'bplist00' | |
# | |
# Generic bits | |
# | |
_FORMATS = { | |
'FMT_BINARY': dict( | |
detect=_is_fmt_binary, | |
parser=_BinaryPlistParser, | |
) | |
} | |
def load(fp, *, fmt=None, use_builtin_types=True, dict_type=dict): | |
"""Read a .plist file. 'fp' should be (readable) file object. | |
Return the unpacked root object (which usually is a dictionary). | |
""" | |
if fmt is None: | |
header = fp.read(32) | |
fp.seek(0) | |
for info in _FORMATS.values(): | |
if info['detect'](header): | |
P = info['parser'] | |
break | |
else: | |
raise InvalidFileException() | |
else: | |
P = _FORMATS[fmt]['parser'] | |
p = P(use_builtin_types=use_builtin_types, dict_type=dict_type) | |
return p.parse(fp) | |
def loads(value, *, fmt=None, use_builtin_types=True, dict_type=dict): | |
"""Read a .plist file from a bytes object. | |
Return the unpacked root object (which usually is a dictionary). | |
""" | |
fp = BytesIO(value) | |
return load( | |
fp, fmt=fmt, use_builtin_types=use_builtin_types, dict_type=dict_type) | |
def json_encode(z): | |
''' | |
used when serializing a decoded bplist into json. | |
''' | |
if isinstance(z, UID): | |
return z.data | |
else: | |
type_name = z.__class__.__name__ | |
raise TypeError(f"Object of type '{type_name}' is not JSON serializable") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
parse SavedState artifacts extracted from OSX. | |
author: Willi Ballenthin ([email protected]) | |
''' | |
import re | |
import sys | |
import json | |
import struct | |
import logging | |
import binascii | |
import collections | |
logger = logging.getLogger('osx.savedstate') | |
logging.basicConfig(level=logging.INFO) | |
try: | |
import hexdump | |
except ImportError: | |
logger.error('please install `hexdump` via pip') | |
sys.exit(-1) | |
try: | |
import bplist | |
except ImportError: | |
logger.error('bplist.py not found in the same directory') | |
sys.exit(-1) | |
except SyntaxError: | |
logger.error('python3 required') | |
sys.exit(-1) | |
def aes_decrypt(key, ciphertext, iv=b'\x00' * 0x10): | |
# AES128-CBC | |
import cryptography.hazmat.backends | |
import cryptography.hazmat.primitives.ciphers | |
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes | |
backend = cryptography.hazmat.backends.default_backend() | |
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend) | |
decryptor = cipher.decryptor() | |
return decryptor.update(ciphertext) + decryptor.finalize() | |
WindowState = collections.namedtuple('WindowState', | |
[ | |
# size of the byte array in `data.data` for this window. | |
'size', | |
# the parsed metadata associated with this window from `windows.plist` | |
'meta', | |
# the decrypted window state byte array. | |
'plaintext', | |
# the deserialized NSKeyedArchiver window state. | |
'state' | |
]) | |
def parse_plaintext(buf): | |
''' | |
parse the decrypted window state extracted from `data.data`. | |
args: | |
buf (bytes): the decrypted window state byte array. | |
returns: | |
Dict[any: any]: the deserialized bplist contents. | |
''' | |
# layout: | |
# | |
# struct S { | |
# // often 0x0 | |
# uint32_t unk1; | |
# uint32_t class_name_size; | |
# char class_name[magic_size]; | |
# // seems to be "rchv" | |
# char magic[4]; | |
# uint32_t size; | |
# // this is an NSKeyedArchiver serialized datastructure. | |
# // in practice, a bplist with specific interpretation. | |
# uint8_t buf[size]; | |
# } | |
unk1, class_name_size = struct.unpack_from('>II', buf, 0x0) | |
class_name, magic, size = struct.unpack_from('>%ds4sI' % (class_name_size), buf, 8) | |
if magic != b'rchv': | |
raise ValueError('unexpected magic') | |
class_name = class_name.decode('ascii') | |
logger.debug('found archived class: %s', class_name) | |
header_size = 8 + class_name_size + 8 | |
plistbuf = buf[header_size:header_size + size] | |
return bplist.loads(plistbuf) | |
def parse_window_state(plist, buf): | |
magic, version, window_id, size = struct.unpack_from('>4s4sII', buf, 0x0) | |
if magic != b'NSCR': | |
raise ValueError('invalid magic') | |
if version != b'1000': | |
raise ValueError('invalid version') | |
ciphertext = buf[0x10:size] | |
try: | |
window = [d for d in plist if d.get('NSWindowID') == window_id][0] | |
except IndexError: | |
window_ids = ', '.join(list(sorted(map(lambda p: str(p.get('NSWindowID', 'unknown')), plist)))) | |
raise ValueError('missing window metadata, wanted: %d, found: %s' % (window_id, window_ids), size) | |
else: | |
logger.debug('found window: %d', window_id) | |
plaintext = aes_decrypt(window['NSDataKey'], ciphertext) | |
state = parse_plaintext(plaintext) | |
return WindowState(size, window, plaintext, state) | |
def parse_window_states(plist, data): | |
''' | |
decrypt and parse the serialized window state stored in `data.data` and `windows.plist`. | |
args: | |
plist (Dict[any, any]): parsed plist `windows.plist`. | |
data (bytes): the contents of `data.data`. | |
returns: | |
List[WindowState]: decrypted window state instances, with fields: | |
size (int): the size of the window state blob. | |
meta (Dict[any, any]): the relevant metadata from `windows.plist`. | |
plaintext (bytes): the decrypted windows state structure. | |
state (Dict[any, any]): the deserialized window state. | |
''' | |
buf = data | |
while len(buf) > 0x10: | |
if not buf.startswith(b'NSCR'): | |
raise ValueError('invalid magic') | |
try: | |
window_state = parse_window_state(plist, buf) | |
except ValueError as e: | |
logger.warning('failed to parse window state: %s', e.args[0]) | |
if len(e.args) > 1: | |
size = e.args[1] | |
buf = buf[size:] | |
continue | |
else: | |
break | |
buf = buf[window_state.size:] | |
yield window_state | |
def json_encode_window_state(z): | |
''' | |
helper for this tool to serialize custom classes into json. | |
''' | |
if isinstance(z, bplist.UID): | |
# this is just a number | |
return z.data | |
if isinstance(z, bytes): | |
try: | |
# much of the data is text, so try to fetch that | |
return z.decode('utf-8') | |
except UnicodeDecodeError: | |
# otherwise, return hex, with a tag | |
return 'hex://' + binascii.b2a_hex(z).decode('ascii') | |
else: | |
type_name = z.__class__.__name__ | |
raise TypeError("Object of type '{type_name}' is not JSON serializable".format(**locals())) | |
def main(): | |
import os | |
import os.path | |
inputpath = sys.argv[1] | |
outputpath = sys.argv[2] | |
logger.info('input: %s', inputpath) | |
with open(os.path.join(inputpath, 'windows.plist'), 'rb') as f: | |
windows = bplist.load(f) | |
with open(os.path.join(inputpath, 'data.data'), 'rb') as f: | |
data = f.read() | |
for i, window in enumerate(parse_window_states(windows, data)): | |
if not window.meta: | |
logger.info('no data for window%d', i) | |
continue | |
filename = 'window%d' % (i) | |
filepath = os.path.join(outputpath, filename + '.json') | |
logger.info('writing: %s', filepath) | |
with open(filepath, 'wb') as f: | |
doc = json.dumps({'meta': window.meta, | |
'state': window.state}, | |
default=json_encode_window_state, | |
indent=4, | |
sort_keys=True) | |
f.write(doc.encode('utf-8')) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment