Last active
October 10, 2019 16:19
-
-
Save williballenthin/21d6e8318e30da76a4dbf1e939436c14 to your computer and use it in GitHub Desktop.
parse the Apple ARD filesystem.cache file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
author: Willi Ballenthin | |
email: [email protected] | |
license: Apache 2.0 | |
''' | |
import sys | |
import struct | |
import collections | |
Header = collections.namedtuple('Header', [ | |
'magic', 'version', | |
'unk1', 'unk2', | |
'ts_delta', | |
'total_entries', 'total_folders', 'total_files', 'total_symlinks', | |
'entries_offset', 'entries_length', | |
'names_offset', 'names_length', | |
'kinds_offset', 'kinds_length', | |
'versions_offset', 'versions_length', | |
'users_offset', 'users_length', | |
'groups_offset', 'groups_length', | |
'complete_size' | |
]) | |
def parse_header(buf): | |
fields = struct.unpack_from('>IIIIIIIIIIIIIIIIIIIIII', buf, 0x0) | |
return Header(*fields) | |
TableHeader = collections.namedtuple('TableHeader', [ | |
'count', | |
'magic', | |
'entries_length', | |
'strings_offset', | |
'strings_length', | |
]) | |
def parse_table_header(buf): | |
return TableHeader(*struct.unpack_from('>IQIII', buf, 0x0)) | |
TableEntryDescriptor = collections.namedtuple('TableEntryDescriptor', [ | |
'offset', | |
'owner', | |
'string_length', | |
]) | |
TableEntry = collections.namedtuple('TableEntry', [ | |
'index', # index into the table of this entry | |
'descriptor', | |
'string', | |
]) | |
def parse_table_entry(buf, header, index, offset): | |
desc = TableEntryDescriptor(*struct.unpack_from('>IIH', buf, offset)) | |
start = 0x18 + header.entries_length + desc.offset | |
# +1 to account for the leading 0x01 | |
end = start + desc.string_length + 1 | |
# [1:] to strip the leading 0x01 (maybe string type?) | |
string = buf[start:end][1:].decode('utf-8') | |
return TableEntry(index, desc, string) | |
Table = collections.namedtuple('Table', [ | |
'header', | |
'entries', | |
]) | |
def parse_table(buf): | |
buf = buf | |
header = parse_table_header(buf) | |
entries = [] | |
offset = 0x18 # sizeof(TableHeader) | |
for i in range(header.count): | |
entries.append(parse_table_entry(buf, header, i, offset)) | |
offset += 10 # sizeof(TableEntry) | |
return Table(header, entries) | |
EntryDescriptor = collections.namedtuple('EntryDescriptor', [ | |
'parent', | |
'field_4', 'field_8', 'field_C', 'field_E', | |
'field_12', 'field_14', 'field_16', | |
'unk1', 'unk2', | |
'field_20', 'field_28', | |
'match_count', | |
'field_2E', 'version_resource', | |
'type_flags', | |
'field_36', | |
'previous_owner', | |
'name_reference', | |
'kind_reference', | |
'version_reference', | |
'user_reference', | |
'group_reference', | |
]) | |
def parse_entry(buf): | |
return EntryDescriptor( | |
*struct.unpack_from('>IIIHIHHIIHQIHHIHHIIIIII', buf, 0x0)) | |
class Entry: | |
def __init__(self, index, offset, descriptor): | |
self.offset = offset | |
self.index = index | |
self.descriptor = descriptor | |
def get_name(self): | |
return self.index.names[self.descriptor.name_reference].string.rstrip('\x0d') | |
def get_kind(self): | |
return self.index.kinds[self.descriptor.kind_reference].string.rstrip('+') | |
def get_version(self): | |
return self.index.versions[self.descriptor.version_reference].string | |
def get_user(self): | |
return self.index.users[self.descriptor.user_reference].string | |
def get_group(self): | |
return self.index.groups[self.descriptor.group_reference].string | |
def is_directory(self): | |
return self.descriptor.type_flags & 0x2 == 0x2 | |
def get_path(self): | |
if self.offset in self.index._path_cache: | |
return self.index._path_cache[self.offset] | |
filename = self.get_name() | |
if self.is_directory(): | |
filename = filename + '/' | |
if self.descriptor.parent != 0x0: | |
parent = self.index.get_entry(self.descriptor.parent) | |
path = parent.get_path() + filename | |
else: | |
path = filename | |
self.index._path_cache[self.offset] = path | |
return path | |
def __str__(self): | |
parts = [] | |
for k, v in (('path', self.get_path().ljust(32)), | |
('kind', self.get_kind().ljust(5)), | |
('user', self.get_user().ljust(16)), | |
('group', self.get_group().ljust(16)), | |
('version', self.get_version())): | |
if v: | |
parts.append('%s: %s' % (k, v)) | |
return 'file: %s' % (' '.join(parts)) | |
class Index: | |
def __init__(self, buf): | |
self.buf = buf | |
self.header = parse_header(buf) | |
self.names = self.get_names_table().entries | |
self.kinds = self.get_kinds_table().entries | |
self.versions = self.get_versions_table().entries | |
self.users = self.get_users_table().entries | |
self.groups = self.get_groups_table().entries | |
# not exported. | |
# written to in `Entry.get_path()`. | |
# | |
# map from entry offset to path | |
self._path_cache = {} | |
def get_table(self, start, length): | |
return parse_table(self.buf[start:start+length]) | |
def get_names_table(self): | |
return self.get_table(self.header.names_offset, | |
self.header.names_length) | |
def get_kinds_table(self): | |
return self.get_table(self.header.kinds_offset, | |
self.header.kinds_length) | |
def get_versions_table(self): | |
return self.get_table(self.header.versions_offset, | |
self.header.versions_length) | |
def get_users_table(self): | |
return self.get_table(self.header.users_offset, | |
self.header.users_length) | |
def get_groups_table(self): | |
return self.get_table(self.header.groups_offset, | |
self.header.groups_length) | |
def get_entry(self, offset): | |
# offset is absolute offset, which is an odd choice | |
desc = parse_entry(self.buf[offset:]) | |
return Entry(index, offset, desc) | |
def get_entries(self): | |
for i in range(self.header.total_entries): | |
yield self.get_entry(0x58 + i * 0x50) | |
def get_name_entry(self, name): | |
for entry in self.names: | |
if entry.string == name: | |
return entry | |
raise KeyError(name) | |
def get_entries_by_name(self, name): | |
name_entry = self.get_name_entry(name) | |
entry = self.get_entry(name_entry.descriptor.owner) | |
yield entry | |
while entry.descriptor.previous_owner != 0x0: | |
entry = self.get_entry(entry.descriptor.previous_owner) | |
yield entry | |
def parse_index(buf): | |
return Index(buf) | |
def print_namedtuple(item): | |
print(item.__class__.__name__ + ":") | |
for field in item._fields: | |
v = getattr(item, field) | |
if isinstance(v, int): | |
print(' - %s: 0x%x' % (field, v)) | |
else: | |
print(' - %s: %s' % (field, v)) | |
with open(sys.argv[1], 'rb') as f: | |
buf = f.read() | |
index = parse_index(buf) | |
print_namedtuple(index.header) | |
print('files:') | |
for entry in index.get_entries(): | |
print(' - ' + str(entry)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment