Skip to content

Instantly share code, notes, and snippets.

@williballenthin
Last active October 10, 2019 16:19
Show Gist options
  • Save williballenthin/21d6e8318e30da76a4dbf1e939436c14 to your computer and use it in GitHub Desktop.
Save williballenthin/21d6e8318e30da76a4dbf1e939436c14 to your computer and use it in GitHub Desktop.
parse the Apple ARD filesystem.cache file
'''
author: Willi Ballenthin
email: [email protected]
license: Apache 2.0
'''
import sys
import struct
import collections
Header = collections.namedtuple('Header', [
'magic', 'version',
'unk1', 'unk2',
'ts_delta',
'total_entries', 'total_folders', 'total_files', 'total_symlinks',
'entries_offset', 'entries_length',
'names_offset', 'names_length',
'kinds_offset', 'kinds_length',
'versions_offset', 'versions_length',
'users_offset', 'users_length',
'groups_offset', 'groups_length',
'complete_size'
])
def parse_header(buf):
fields = struct.unpack_from('>IIIIIIIIIIIIIIIIIIIIII', buf, 0x0)
return Header(*fields)
TableHeader = collections.namedtuple('TableHeader', [
'count',
'magic',
'entries_length',
'strings_offset',
'strings_length',
])
def parse_table_header(buf):
return TableHeader(*struct.unpack_from('>IQIII', buf, 0x0))
TableEntryDescriptor = collections.namedtuple('TableEntryDescriptor', [
'offset',
'owner',
'string_length',
])
TableEntry = collections.namedtuple('TableEntry', [
'index', # index into the table of this entry
'descriptor',
'string',
])
def parse_table_entry(buf, header, index, offset):
desc = TableEntryDescriptor(*struct.unpack_from('>IIH', buf, offset))
start = 0x18 + header.entries_length + desc.offset
# +1 to account for the leading 0x01
end = start + desc.string_length + 1
# [1:] to strip the leading 0x01 (maybe string type?)
string = buf[start:end][1:].decode('utf-8')
return TableEntry(index, desc, string)
Table = collections.namedtuple('Table', [
'header',
'entries',
])
def parse_table(buf):
buf = buf
header = parse_table_header(buf)
entries = []
offset = 0x18 # sizeof(TableHeader)
for i in range(header.count):
entries.append(parse_table_entry(buf, header, i, offset))
offset += 10 # sizeof(TableEntry)
return Table(header, entries)
EntryDescriptor = collections.namedtuple('EntryDescriptor', [
'parent',
'field_4', 'field_8', 'field_C', 'field_E',
'field_12', 'field_14', 'field_16',
'unk1', 'unk2',
'field_20', 'field_28',
'match_count',
'field_2E', 'version_resource',
'type_flags',
'field_36',
'previous_owner',
'name_reference',
'kind_reference',
'version_reference',
'user_reference',
'group_reference',
])
def parse_entry(buf):
return EntryDescriptor(
*struct.unpack_from('>IIIHIHHIIHQIHHIHHIIIIII', buf, 0x0))
class Entry:
def __init__(self, index, offset, descriptor):
self.offset = offset
self.index = index
self.descriptor = descriptor
def get_name(self):
return self.index.names[self.descriptor.name_reference].string.rstrip('\x0d')
def get_kind(self):
return self.index.kinds[self.descriptor.kind_reference].string.rstrip('+')
def get_version(self):
return self.index.versions[self.descriptor.version_reference].string
def get_user(self):
return self.index.users[self.descriptor.user_reference].string
def get_group(self):
return self.index.groups[self.descriptor.group_reference].string
def is_directory(self):
return self.descriptor.type_flags & 0x2 == 0x2
def get_path(self):
if self.offset in self.index._path_cache:
return self.index._path_cache[self.offset]
filename = self.get_name()
if self.is_directory():
filename = filename + '/'
if self.descriptor.parent != 0x0:
parent = self.index.get_entry(self.descriptor.parent)
path = parent.get_path() + filename
else:
path = filename
self.index._path_cache[self.offset] = path
return path
def __str__(self):
parts = []
for k, v in (('path', self.get_path().ljust(32)),
('kind', self.get_kind().ljust(5)),
('user', self.get_user().ljust(16)),
('group', self.get_group().ljust(16)),
('version', self.get_version())):
if v:
parts.append('%s: %s' % (k, v))
return 'file: %s' % (' '.join(parts))
class Index:
def __init__(self, buf):
self.buf = buf
self.header = parse_header(buf)
self.names = self.get_names_table().entries
self.kinds = self.get_kinds_table().entries
self.versions = self.get_versions_table().entries
self.users = self.get_users_table().entries
self.groups = self.get_groups_table().entries
# not exported.
# written to in `Entry.get_path()`.
#
# map from entry offset to path
self._path_cache = {}
def get_table(self, start, length):
return parse_table(self.buf[start:start+length])
def get_names_table(self):
return self.get_table(self.header.names_offset,
self.header.names_length)
def get_kinds_table(self):
return self.get_table(self.header.kinds_offset,
self.header.kinds_length)
def get_versions_table(self):
return self.get_table(self.header.versions_offset,
self.header.versions_length)
def get_users_table(self):
return self.get_table(self.header.users_offset,
self.header.users_length)
def get_groups_table(self):
return self.get_table(self.header.groups_offset,
self.header.groups_length)
def get_entry(self, offset):
# offset is absolute offset, which is an odd choice
desc = parse_entry(self.buf[offset:])
return Entry(index, offset, desc)
def get_entries(self):
for i in range(self.header.total_entries):
yield self.get_entry(0x58 + i * 0x50)
def get_name_entry(self, name):
for entry in self.names:
if entry.string == name:
return entry
raise KeyError(name)
def get_entries_by_name(self, name):
name_entry = self.get_name_entry(name)
entry = self.get_entry(name_entry.descriptor.owner)
yield entry
while entry.descriptor.previous_owner != 0x0:
entry = self.get_entry(entry.descriptor.previous_owner)
yield entry
def parse_index(buf):
return Index(buf)
def print_namedtuple(item):
print(item.__class__.__name__ + ":")
for field in item._fields:
v = getattr(item, field)
if isinstance(v, int):
print(' - %s: 0x%x' % (field, v))
else:
print(' - %s: %s' % (field, v))
with open(sys.argv[1], 'rb') as f:
buf = f.read()
index = parse_index(buf)
print_namedtuple(index.header)
print('files:')
for entry in index.get_entries():
print(' - ' + str(entry))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment