Created
April 8, 2015 05:27
-
-
Save pudquick/ff412bcb29c9c1fa4b8d to your computer and use it in GitHub Desktop.
A pbzx stream decoder for the format found within Yosemite package payloads.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# v2 pbzx stream handler | |
# My personal writeup on the differences here: https://gist.github.com/pudquick/29fcfe09c326a9b96cf5 | |
# | |
# Pure python reimplementation of .cpio.xz content extraction from pbzx file payload originally here: | |
# http://www.tonymacx86.com/general-help/135458-pbzx-stream-parser.html | |
# | |
# Cleaned up C version (as the basis for my code) here, thanks to Pepijn Bruienne / @bruienne | |
# https://gist.github.com/bruienne/029494bbcfb358098b41 | |
import struct, sys | |
def seekread(f, offset=None, length=0, relative=True): | |
if (offset != None): | |
# offset provided, let's seek | |
f.seek(offset, [0,1,2][relative]) | |
if (length != 0): | |
return f.read(length) | |
def parse_pbzx(pbzx_path): | |
section = 0 | |
xar_out_path = '%s.part%02d.cpio.xz' % (pbzx_path, section) | |
f = open(pbzx_path, 'rb') | |
# pbzx = f.read() | |
# f.close() | |
magic = seekread(f,length=4) | |
if magic != 'pbzx': | |
raise "Error: Not a pbzx file" | |
# Read 8 bytes for initial flags | |
flags = seekread(f,length=8) | |
# Interpret the flags as a 64-bit big-endian unsigned int | |
flags = struct.unpack('>Q', flags)[0] | |
xar_f = open(xar_out_path, 'wb') | |
while (flags & (1 << 24)): | |
# Read in more flags | |
flags = seekread(f,length=8) | |
flags = struct.unpack('>Q', flags)[0] | |
# Read in length | |
f_length = seekread(f,length=8) | |
f_length = struct.unpack('>Q', f_length)[0] | |
xzmagic = seekread(f,length=6) | |
if xzmagic != '\xfd7zXZ\x00': | |
# This isn't xz content, this is actually _raw decompressed cpio_ chunk of 16MB in size... | |
# Let's back up ... | |
seekread(f,offset=-6,length=0) | |
# ... and split it out ... | |
f_content = seekread(f,length=f_length) | |
section += 1 | |
decomp_out = '%s.part%02d.cpio' % (pbzx_path, section) | |
g = open(decomp_out, 'wb') | |
g.write(f_content) | |
g.close() | |
# Now to start the next section, which should hopefully be .xz (we'll just assume it is ...) | |
xar_f.close() | |
section += 1 | |
new_out = '%s.part%02d.cpio.xz' % (pbzx_path, section) | |
xar_f = open(new_out, 'wb') | |
else: | |
f_length -= 6 | |
# This part needs buffering | |
f_content = seekread(f,length=f_length) | |
tail = seekread(f,offset=-2,length=2) | |
xar_f.write(xzmagic) | |
xar_f.write(f_content) | |
if tail != 'YZ': | |
xar_f.close() | |
raise "Error: Footer is not xar file footer" | |
try: | |
f.close() | |
xar_f.close() | |
except: | |
pass | |
def main(): | |
result = parse_pbzx(sys.argv[1]) | |
print "Now xz decompress the .xz chunks, then 'cat' them all together in order into a single new.cpio file" | |
if __name__ == '__main__': | |
main() |
Thanks! This is super handy. Does this code have a license?
"Error: Footer is not xar file footer"
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@eeeyahoo you need to put
#!/usr/bin/env python
on the first line or invoke your script withpython parse_pbzx2.py
instead.@bjoerns1683 You can use unxz (from http://tukaani.org/xz/). Alternatively you can try https://gist.github.com/Lekensteyn/6e0840e77bc9bd013f57 which is compatible with more formats.