notwa · March 21, 2018 10:23
diff --git a/_example.sh b/_example.sh
 #!/usr/bin/env bash
 set -e

 # set this as needed!
 rom="Legend of Zelda, The - Ocarina of Time (U) (V1.0) [!].z64"

 output="reconstructed.z64"

 gcc_flags="-std=gnu11 -Wall -O3 -s"

 if [ -z "$MSYSTEM" ]; then # check for MSYS2
    # we're probably not running Windows.
    if [ ! -s yaz0 ]; then
        gcc $gcc_flags yaz0.c -o yaz0
    fi
    ./recompress.py yaz0 "$rom" "$output"
 else
    # we're probably running Windows.
    if [ ! -s yaz0.exe ]; then
        x86_64-w64-mingw32-gcc $gcc_flags yaz0.c -o yaz0.exe
    fi
    ./recompress.py yaz0.exe "$rom" "$output"
 fi

 sha1sum "$rom" "$output"
diff --git a/_example.txt b/_example.txt
 build	zelda@srd44 98-10-21 04:56:31
 bootcode	6105
 crc1	EC7011B7
 crc2	7616D72B
 created	reconstructed.z64
 ad69c91157f6705e8ab06c79fe08aad47bb57ba7 *Legend of Zelda, The - Ocarina of Time (U) (V1.0) [!].z64
 ad69c91157f6705e8ab06c79fe08aad47bb57ba7 *reconstructed.z64
diff --git a/recompress.py b/recompress.py
 #!/usr/bin/env python3

 from hashlib import sha1
 from io import BytesIO
 from zlib import crc32
 import os
 import os.path
 import struct, array
 import subprocess as sp
 import sys
 import traceback


 # globals {{{1

 z64yaz0_path = None


 # utilities {{{1


 def lament(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


 def align(x):
    return (x + 15) // 16 * 16


 def R1(data):
    return struct.unpack('>B', data)[0]


 def R2(data):
    return struct.unpack('>H', data)[0]


 def R4(data):
    return struct.unpack('>I', data)[0]


 def W1(data):
    return struct.pack('>B', data)


 def W2(data):
    return struct.pack('>H', data)


 def W4(data):
    return struct.pack('>I', data)


 def swap_order(f, size='H'):
    f.seek(0)
    a = array.array(size, f.read())
    a.byteswap()
    f.seek(0)
    f.write(a.tobytes())


 class SubDir:
    def __init__(self, directory):
        self.directory = directory
    def __enter__(self):
        self.cwd = os.getcwd()
        try:
            os.mkdir(self.directory)
        except FileExistsError:
            pass
        os.chdir(self.directory)
    def __exit__(self, type_, value, traceback):
        os.chdir(self.cwd)


 # subprocess wrapper {{{1

 # subprocess still comes with the same old useless wrappers,
 # so we'll write our own!

 class PoopenError(sp.CalledProcessError):
    def __init__(self, returncode, cmd, output=None, error=None):
        self.returncode = returncode
        self.cmd = cmd
        self.output = output
        self.error = error
    def __str__(self):
        s = "Command failed with exit status {}:\n{}".format(self.returncode, self.cmd)
        if self.output:
            output = str(self.output, 'utf-8', 'ignore')
            s += "\nstdout:\n{}\n".format(output)
        if self.error:
            error = str(self.error, 'utf-8', 'ignore')
            s += "\nstderr:\n{}\n".format(error)
        return s


 def poopen(args, env=None):
    p = sp.Popen(args, stdout=sp.PIPE, stderr=sp.PIPE, env=env)
    out, err = p.communicate()
    if p.returncode != 0:
        raise PoopenError(returncode=p.returncode, cmd=args, output=out, error=err)
    return p.returncode, out, err


 # subprocesses {{{1

 def compress_file(fp_in, fp_out):
    return poopen((z64yaz0_path, fp_in, fp_out))


 def decompress_file(fp_in, fp_out):
    return poopen((z64yaz0_path, fp_in, fp_out))


 # checksums {{{1

 # Based on uCON64's N64 checksum algorithm by Andreas Sterbenz

 MAX32 = 0xFFFFFFFF

 crc_seeds = {
    6101: 0xF8CA4DDC,
    6102: 0xF8CA4DDC,
    6103: 0xA3886759,
    6105: 0xDF26F436,
    6106: 0x1FEA617A,
 }

 bootcode_crcs = {
    0x6170A4A1: 6101,
    0x90BB6CB5: 6102,
    0x0B050EE0: 6103,
    0x98BC2C86: 6105,
    0xACC8580A: 6106,
 }


 def ROL(i, b):
    return ((i << b) | (i >> (32 - b))) & MAX32


 def crc_R4(b):
    return b[0]*0x1000000 + b[1]*0x10000 + b[2]*0x100 + b[3]


 def crc(f, bootcode=6105):
    seed = crc_seeds[bootcode]
    t1 = t2 = t3 = t4 = t5 = t6 = seed

    if bootcode == 6105:
        f.seek(0x0710 + 0x40)
        lookup = f.read(0x100)

    f.seek(0x1000)
    for i in range(0x1000, 0x101000, 4):
        d = crc_R4(f.read(4))

        if ((t6 + d) & MAX32) < t6:
            t4 += 1
            t4 &= MAX32

        t6 += d
        t6 &= MAX32

        t3 ^= d

        r = ROL(d, d & 0x1F)

        t5 += r
        t5 &= MAX32

        if t2 > d:
            t2 ^= r
        else:
            t2 ^= t6 ^ d

        if bootcode == 6105:
            o = i & 0xFF
            temp = crc_R4(lookup[o:o + 4])
        else:
            temp = t5
        t1 += temp ^ d
        t1 &= MAX32

    if bootcode == 6103:
        crc1 = (t6 ^ t4) + t3
        crc2 = (t5 ^ t2) + t1
    elif bootcode == 6106:
        crc1 = t6*t4 + t3
        crc2 = t5*t2 + t1
    else:
        crc1 = t6 ^ t4 ^ t3
        crc2 = t5 ^ t2 ^ t1
    return crc1 & MAX32, crc2 & MAX32


 def bootcode_version(f):
    f.seek(0x40)
    return bootcode_crcs[crc32(f.read(0x1000 - 0x40)) & MAX32]


 # the rest {{{1


 def dump_as(b, fp, size=None):
    with open(fp, 'w+b') as f:
        if size:
            f.write(bytearray(size))
            f.seek(0)
        f.write(b)


 # assume first entry is makerom (0x1060), and second entry begins from makerom
 dma_sig = b"\x00\x00\x00\x00\x00\x00\x10\x60\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x60"


 def z_dump_file(f, i=0):
    vs = R4(f.read(4)) # virtual start
    ve = R4(f.read(4)) # virtual end
    ps = R4(f.read(4)) # physical start
    pe = R4(f.read(4)) # physical end
    here = f.tell()

    if vs == ve == ps == pe == 0:
        return False

    # ve inferred from filesize, and we're making pe be 0
    # ps can just be the end of the last file
    fp = '{:04} V{:08X}'.format(i, vs)

    size = ve - vs

    if ps == 0xFFFFFFFF or pe == 0xFFFFFFFF:
        #lament('file does not exist')
        dump_as(b'', fp, 0)
    elif pe == 0:
        #lament('file is uncompressed')
        pe = ps + size
        f.seek(ps)
        data = f.read(pe - ps)
        dump_as(data, fp, size)
    else:
        #lament('file is compressed')
        f.seek(ps)
        compressed = f.read(pe - ps)
        if compressed[:4] == b'Yaz0':
            dump_as(compressed, fp + '.Yaz0', len(compressed))
        else:
            lament('unknown compression:', fp)
            dump_as(compressed, fp, len(compressed))

    f.seek(here)
    return True


 def z_find_dma(f):
    while True:
        # assume row alignment
        data = f.read(16)
        if len(data) == 0: # EOF
            break
        if data == dma_sig[:16]:
            rest = dma_sig[16:]
            if f.read(len(rest)) == rest:
                return f.tell() - len(rest) - 16
            else:
                f.seek(len(rest), 1)


 def z_dump(f):
    f.seek(0x1060) # skip header when finding dmatable
    addr = z_find_dma(f)
    if addr == None:
        raise Exception("couldn't find file offset table")

    f.seek(addr - 0x30)
    build = f.read(0x30).replace(b'\x00', b' ').strip()
    print("build", str(build, 'utf-8'), sep='\t')

    f.seek(addr)
    i = 0
    while z_dump_file(f, i):
        i += 1


 def z_read_file(path, fn=None):
    if fn == None:
        fn = os.path.basename(path)

    if len(fn) < 14:
        return False, None, None

    fn = str(fn[:14])

    if fn[4:6] != ' V':
        return False, None, None

    try:
        vs = int(fn[ 6: 14], 16)
    except ValueError:
        return False, None, None

    with open(path, 'rb') as f:
        data = f.read()

    return True, data, vs


 def z_write_dma(f, dma):
    dma.sort(key=lambda vf: vf[0]) # sort by vs
    assert len(dma) > 2
    dma_entry = dma[2] # assumption
    vs, ve, ps, pe = dma_entry

    # initialize with zeros
    dma_size = ve - vs
    f.seek(ps)
    f.write(bytearray(dma_size))

    f.seek(ps)
    for vf in dma:
        vs, ve, ps, pe = vf
        #lament('{:08X} {:08X} {:08X} {:08X}'.format(vs, ve, ps, pe))
        f.write(W4(vs))
        f.write(W4(ve))
        f.write(W4(ps))
        f.write(W4(pe))
    assert f.tell() <= (pe or ve)


 def update_crc(f):
    bootcode = bootcode_version(f)
    print('bootcode', bootcode, sep='\t')
    crc1, crc2 = crc(f, bootcode)
    print('crc1', '{:08X}'.format(crc1), sep='\t')
    print('crc2', '{:08X}'.format(crc2), sep='\t')
    f.seek(0x10)
    f.write(W4(crc1))
    f.write(W4(crc2))


 def dump_rom(fp, outpath=None):
    with open(fp, 'rb') as f:
        data = f.read()

    with BytesIO(data) as f:
        start = f.read(4)
        if start == b'\x37\x80\x40\x12':
            swap_order(f)
        elif start != b'\x80\x37\x12\x40':
            raise Exception('not a .z64: {}'.format(fp))

        f.seek(0)
        romhash = sha1(f.read()).hexdigest()

        if outpath is None:
            outpath = romhash

        if os.path.exists(outpath):
            lament("warning: output directory already exists:", outpath)

        with SubDir(outpath):
            f.seek(0)
            z_dump(f)

    return outpath, len(data)


 def recompress_files(path):
    root, _, files = next(os.walk(path))
    files.sort()

    for i, fn in enumerate(files):
        if fn.endswith('.Yaz0'):
            uncomp_fn = fn[:-len('.Yaz0')]
            #print("recompressing", fn, sep='\t')

            comp_path = os.path.join(root, fn)
            uncomp_path = os.path.join(root, uncomp_fn)

            with open(comp_path, 'rb') as f:
                old_hash = sha1(f.read()).hexdigest()

            uncomp = decompress_file(comp_path, uncomp_path)
            comp = compress_file(uncomp_path, comp_path)
            os.remove(uncomp_path)

            with open(comp_path, 'rb') as f:
                new_hash = sha1(f.read()).hexdigest()

            if old_hash != new_hash:
                print("mismatch", old_hash[:7], new_hash[:7], fn, sep='\t')


 def create_rom(directory, fp_out=None, rom_size=64 * 1024 * 1024):
    root, _, files = next(os.walk(directory))
    files.sort()

    if fp_out is None:
        fp_out = directory + '.z64'

    with open(fp_out, 'w+b') as f:
        dma = []

        # initialize with a repeating pattern
        pattern = bytearray(i for i in range(256))
        f.write(pattern * (rom_size // len(pattern)))

        f.seek(0)

        start_v = 0
        start_p = 0

        for i, fn in enumerate(files):
            path = os.path.join(root, fn)
            success, data, vs = z_read_file(path, fn)
            if not success:
                lament('skipping:', fn)
                continue

            size_v = len(data)
            size_p = size_v
            unempty = size_v > 0
            compressed = size_v >= 4 and data[:4] == b'Yaz0'

            if i <= 2:
                # makerom, boot, dmadata need to be exactly where they were
                start_v = vs
                start_p = start_v
            else:
                start_v = align(start_v)
                start_p = align(start_p)

            if unempty:
                ps = start_p
                if compressed:
                    pe = align(start_p + size_p)
                    ve = vs + int.from_bytes(data[4:8], 'big')
                else:
                    pe = 0
                    ve = vs + size_v
            else:
                ps = 0xFFFFFFFF
                pe = 0xFFFFFFFF
                ve = vs

            assert start_v <= rom_size
            assert start_v + size_v <= rom_size

            # i'm not sure how picky the game is with the dmatable.
            assert vs % 0x10 == 0
            assert ve % 0x10 == 0

            if unempty:
                f.seek(start_p)
                f.write(data)

            dma.append([vs, ve, ps, pe])

            start_v += size_v
            start_p += size_p

        z_write_dma(f, dma)
        update_crc(f)

    return fp_out


 def recompress(fp_in, fp_out):
    try:
        dumped, size = dump_rom(fp_in)
        recompress_files(dumped)
        newrom = create_rom(dumped, fp_out, rom_size=size)
        print("created", newrom, sep='\t')
    except:
        traceback.print_exc(file=sys.stderr)
        return False
    return True


 def print_help(name, file=sys.stdout):
    print("""{}: deconstruct and reconstruct Ocarina of Time ROMs\

 usage: {{path to Yaz0 (de)compressor}} {{path to ROM}} {{output filename}}
 """.format(name), file=file)


 def main(program_name, args):
    for help_like in ('-h', '--help', '-?', '/?'):
        if help_like in args:
            print_help(program_name)
            return 0

    if len(args) < 3:
        print_help(program_name, sys.stderr)
        return 1

    global z64yaz0_path
    z64yaz0_path = os.path.abspath(args[0])

    ok = recompress(args[1], args[2])
    return 0 if ok else 1


 if __name__ == '__main__':
    try:
        sys.exit(main(sys.argv[0], sys.argv[1:]))
    except KeyboardInterrupt:
        sys.exit(1)
diff --git a/yaz0.c b/yaz0.c
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>

 // version 1.0 (20050707) by shevious
 // Thanks to thakis for yaz0dec 1.0.

 typedef uint8_t u8;
 typedef uint32_t u32;

 #define MAX_RUNLEN (0xFF + 0x12)

 // simple and straight encoding scheme for Yaz0
 static u32 simpleEnc(u8 *src, int size, int pos, u32 *pMatchPos)
 {
    int startPos = pos - 0x1000;
    u32 numBytes = 1;
    u32 matchPos = 0;

    int end = size - pos;
    // maximum runlength for 3 byte encoding
    if (end > MAX_RUNLEN)
        end = MAX_RUNLEN;

    if (startPos < 0)
        startPos = 0;
    for (int i = startPos; i < pos; i++) {
        int j;
        for (j = 0; j < end; j++) {
            if (src[i + j] != src[j + pos])
                break;
        }
        if (j > numBytes) {
            numBytes = j;
            matchPos = i;
        }
    }

    *pMatchPos = matchPos;

    if (numBytes == 2)
        numBytes = 1;

    return numBytes;
 }

 // a lookahead encoding scheme for ngc Yaz0
 static u32 nintendoEnc(u8 *src, int size, int pos, u32 *pMatchPos)
 {
    u32 numBytes = 1;
    static u32 numBytes1;
    static u32 matchPos;
    static int prevFlag = 0;

    // if prevFlag is set, it means that the previous position
    // was determined by look-ahead try.
    // so just use it. this is not the best optimization,
    // but nintendo's choice for speed.
    if (prevFlag == 1) {
        *pMatchPos = matchPos;
        prevFlag = 0;
        return numBytes1;
    }

    prevFlag = 0;
    numBytes = simpleEnc(src, size, pos, &matchPos);
    *pMatchPos = matchPos;

    // if this position is RLE encoded, then compare to copying 1 byte and next position(pos+1) encoding
    if (numBytes >= 3) {
        numBytes1 = simpleEnc(src, size, pos + 1, &matchPos);
        // if the next position encoding is +2 longer than current position, choose it.
        // this does not guarantee the best optimization, but fairly good optimization with speed.
        if (numBytes1 >= numBytes + 2) {
            numBytes = 1;
            prevFlag = 1;
        }
    }
    return numBytes;
 }

 static int encodeYaz0(u8 *src, u8 *dst, int srcSize)
 {
    int srcPos = 0;
    int dstPos = 0;
    int bufPos = 0;

    u8 buf[24]; // 8 codes * 3 bytes maximum

    u32 validBitCount = 0; // number of valid bits left in "code" byte
    u8 currCodeByte = 0; // a bitfield, set bits meaning copy, unset meaning RLE

    while (srcPos < srcSize) {
        u32 numBytes;
        u32 matchPos;

        numBytes = nintendoEnc(src, srcSize, srcPos, &matchPos);
        if (numBytes < 3) {
            // straight copy
            buf[bufPos] = src[srcPos];
            bufPos++;
            srcPos++;
            //set flag for straight copy
            currCodeByte |= (0x80 >> validBitCount);
        } else {
            //RLE part
            u32 dist = srcPos - matchPos - 1;
            u8 byte1, byte2, byte3;

            if (numBytes >= 0x12) { // 3 byte encoding
                byte1 = 0 | (dist >> 8);
                byte2 = dist & 0xFF;
                buf[bufPos++] = byte1;
                buf[bufPos++] = byte2;
                // maximum runlength for 3 byte encoding
                if (numBytes > MAX_RUNLEN)
                    numBytes = MAX_RUNLEN;
                byte3 = numBytes - 0x12;
                buf[bufPos++] = byte3;
            } else { // 2 byte encoding
                byte1 = ((numBytes - 2) << 4) | (dist >> 8);
                byte2 = dist & 0xFF;
                buf[bufPos++] = byte1;
                buf[bufPos++] = byte2;
            }
            srcPos += numBytes;
        }

        validBitCount++;

        // write eight codes
        if (validBitCount == 8) {
            dst[dstPos++] = currCodeByte;
            for (int j = 0; j < bufPos; j++)
                dst[dstPos++] = buf[j];

            currCodeByte = 0;
            validBitCount = 0;
            bufPos = 0;
        }
    }

    if (validBitCount > 0) {
        dst[dstPos++] = currCodeByte;
        for (int j = 0; j < bufPos; j++)
            dst[dstPos++] = buf[j];

        currCodeByte = 0;
        validBitCount = 0;
        bufPos = 0;
    }

    return dstPos;
 }

 void decompress(u8 *src, u8 *dst, int uncompressedSize)
 {
    int srcPlace = 0, dstPlace = 0; // current read/write positions

    u32 validBitCount = 0; // number of valid bits left in "code" byte
    u8 currCodeByte = 0;

    while (dstPlace < uncompressedSize) {
        // read new "code" byte if the current one is used up
        if (validBitCount == 0) {
            currCodeByte = src[srcPlace++];
            validBitCount = 8;
        }

        if ((currCodeByte & 0x80) != 0) {
            // straight copy
            dst[dstPlace++] = src[srcPlace++];
        } else {
            // RLE part
            u8 byte1 = src[srcPlace++];
            u8 byte2 = src[srcPlace++];

            u32 dist = ((byte1 & 0xF) << 8) | byte2;
            u32 copySource = dstPlace - (dist + 1);

            u32 numBytes = byte1 >> 4;
            if (numBytes == 0) {
                numBytes = src[srcPlace++] + 0x12;
            } else {
                numBytes += 2;
            }

            // copy run
            for(int i = 0; i < numBytes; ++i) {
                dst[dstPlace++] = dst[copySource++];
            }
        }

        // use next bit from "code" byte
        currCodeByte <<= 1;
        validBitCount--;
    }
 }

 int main(int argc, char *argv[])
 {
    if (argc != 3) {
        if (argc <= 0) {
            fputs("You shouldn't have done that.\n", stderr);
            exit(1);
        }
        fprintf(stderr, "usage: %s {input-file} {output-file}\n", argv[0]);
        exit(1);
    }

    FILE *fin = fopen(argv[1], "rb");

    if (fin == NULL) {
        perror(argv[1]);
        exit(1);
    }

    fseek(fin, 0, SEEK_END);
    long size = ftell(fin);
    fseek(fin, 0, SEEK_SET);

    u8 *bufi = malloc(size);
    fread(bufi, 1, size, fin);

    fclose(fin);

    FILE *fout = fopen(argv[2], "wb");

    if (fout == NULL) {
        perror(argv[1]);
        exit(1);
    }

    if (size > 0x10
        && bufi[0] == 'Y'
        && bufi[1] == 'a'
        && bufi[2] == 'z'
        && bufi[3] == '0') {
        long usize = (bufi[4] << 24)
            | (bufi[5] << 16)
            | (bufi[6] << 8)
            | bufi[7];
        u8 *bufo = malloc(usize);
        decompress(bufi + 16, bufo, usize);
        fwrite(bufo, usize, 1, fout);
        free(bufo);

    } else {
        // we don't know how big the "compressed" file could get,
        // so over-allocate!
        // modern systems have more RAM than the largest Yaz0 file, so...
        u8 *bufo = malloc(size * 2);

        // write 4 bytes yaz0 header
        bufo[0] = 'Y';
        bufo[1] = 'a';
        bufo[2] = 'z';
        bufo[3] = '0';

        // write 4 bytes uncompressed size
        bufo[4] = (size >> 24) & 0xFF;
        bufo[5] = (size >> 16) & 0xFF;
        bufo[6] = (size >> 8) & 0xFF;
        bufo[7] = (size >> 0) & 0xFF;

        // write 8 bytes unused dummy
        bufo[8] = 0;
        bufo[9] = 0;
        bufo[10] = 0;
        bufo[11] = 0;
        bufo[12] = 0;
        bufo[13] = 0;
        bufo[14] = 0;
        bufo[15] = 0;

        long csize = encodeYaz0(bufi, bufo + 16, size) + 16;

        // pad compressed file to be a multiple of 16 bytes.
        long ceilsize = (csize + 15) & ~0xF;
        for (long i = csize; i < ceilsize; i++)
            bufo[i] = 0;

        fwrite(bufo, ceilsize, 1, fout);
        free(bufo);
    }

    free(bufi);
    fclose(fout);
 }
diff --git a/yaz0.exe b/yaz0.exe
	#!/usr/bin/env bash
	set -e

	# set this as needed!
	rom="Legend of Zelda, The - Ocarina of Time (U) (V1.0) [!].z64"

	output="reconstructed.z64"

	gcc_flags="-std=gnu11 -Wall -O3 -s"

	if [ -z "$MSYSTEM" ]; then # check for MSYS2
	# we're probably not running Windows.
	if [ ! -s yaz0 ]; then
	gcc $gcc_flags yaz0.c -o yaz0
	fi
	./recompress.py yaz0 "$rom" "$output"
	else
	# we're probably running Windows.
	if [ ! -s yaz0.exe ]; then
	x86_64-w64-mingw32-gcc $gcc_flags yaz0.c -o yaz0.exe
	fi
	./recompress.py yaz0.exe "$rom" "$output"
	fi

	sha1sum "$rom" "$output"
	build zelda@srd44 98-10-21 04:56:31
	bootcode 6105
	crc1 EC7011B7
	crc2 7616D72B
	created reconstructed.z64
	ad69c91157f6705e8ab06c79fe08aad47bb57ba7 *Legend of Zelda, The - Ocarina of Time (U) (V1.0) [!].z64
	ad69c91157f6705e8ab06c79fe08aad47bb57ba7 *reconstructed.z64
	#!/usr/bin/env python3

	from hashlib import sha1
	from io import BytesIO
	from zlib import crc32
	import os
	import os.path
	import struct, array
	import subprocess as sp
	import sys
	import traceback


	# globals {{{1

	z64yaz0_path = None


	# utilities {{{1


	def lament(args, *kwargs):
	print(args, file=sys.stderr, *kwargs)


	def align(x):
	return (x + 15) // 16 * 16


	def R1(data):
	return struct.unpack('>B', data)[0]


	def R2(data):
	return struct.unpack('>H', data)[0]


	def R4(data):
	return struct.unpack('>I', data)[0]


	def W1(data):
	return struct.pack('>B', data)


	def W2(data):
	return struct.pack('>H', data)


	def W4(data):
	return struct.pack('>I', data)


	def swap_order(f, size='H'):
	f.seek(0)
	a = array.array(size, f.read())
	a.byteswap()
	f.seek(0)
	f.write(a.tobytes())


	class SubDir:
	def __init__(self, directory):
	self.directory = directory
	def __enter__(self):
	self.cwd = os.getcwd()
	try:
	os.mkdir(self.directory)
	except FileExistsError:
	pass
	os.chdir(self.directory)
	def __exit__(self, type_, value, traceback):
	os.chdir(self.cwd)


	# subprocess wrapper {{{1

	# subprocess still comes with the same old useless wrappers,
	# so we'll write our own!

	class PoopenError(sp.CalledProcessError):
	def __init__(self, returncode, cmd, output=None, error=None):
	self.returncode = returncode
	self.cmd = cmd
	self.output = output
	self.error = error
	def __str__(self):
	s = "Command failed with exit status {}:\n{}".format(self.returncode, self.cmd)
	if self.output:
	output = str(self.output, 'utf-8', 'ignore')
	s += "\nstdout:\n{}\n".format(output)
	if self.error:
	error = str(self.error, 'utf-8', 'ignore')
	s += "\nstderr:\n{}\n".format(error)
	return s


	def poopen(args, env=None):
	p = sp.Popen(args, stdout=sp.PIPE, stderr=sp.PIPE, env=env)
	out, err = p.communicate()
	if p.returncode != 0:
	raise PoopenError(returncode=p.returncode, cmd=args, output=out, error=err)
	return p.returncode, out, err


	# subprocesses {{{1

	def compress_file(fp_in, fp_out):
	return poopen((z64yaz0_path, fp_in, fp_out))


	def decompress_file(fp_in, fp_out):
	return poopen((z64yaz0_path, fp_in, fp_out))


	# checksums {{{1

	# Based on uCON64's N64 checksum algorithm by Andreas Sterbenz

	MAX32 = 0xFFFFFFFF

	crc_seeds = {
	6101: 0xF8CA4DDC,
	6102: 0xF8CA4DDC,
	6103: 0xA3886759,
	6105: 0xDF26F436,
	6106: 0x1FEA617A,
	}

	bootcode_crcs = {
	0x6170A4A1: 6101,
	0x90BB6CB5: 6102,
	0x0B050EE0: 6103,
	0x98BC2C86: 6105,
	0xACC8580A: 6106,
	}


	def ROL(i, b):
	return ((i << b) \| (i >> (32 - b))) & MAX32


	def crc_R4(b):
	return b[0]0x1000000 + b[1]0x10000 + b[2]*0x100 + b[3]


	def crc(f, bootcode=6105):
	seed = crc_seeds[bootcode]
	t1 = t2 = t3 = t4 = t5 = t6 = seed

	if bootcode == 6105:
	f.seek(0x0710 + 0x40)
	lookup = f.read(0x100)

	f.seek(0x1000)
	for i in range(0x1000, 0x101000, 4):
	d = crc_R4(f.read(4))

	if ((t6 + d) & MAX32) < t6:
	t4 += 1
	t4 &= MAX32

	t6 += d
	t6 &= MAX32

	t3 ^= d

	r = ROL(d, d & 0x1F)

	t5 += r
	t5 &= MAX32

	if t2 > d:
	t2 ^= r
	else:
	t2 ^= t6 ^ d

	if bootcode == 6105:
	o = i & 0xFF
	temp = crc_R4(lookup[o:o + 4])
	else:
	temp = t5
	t1 += temp ^ d
	t1 &= MAX32

	if bootcode == 6103:
	crc1 = (t6 ^ t4) + t3
	crc2 = (t5 ^ t2) + t1
	elif bootcode == 6106:
	crc1 = t6*t4 + t3
	crc2 = t5*t2 + t1
	else:
	crc1 = t6 ^ t4 ^ t3
	crc2 = t5 ^ t2 ^ t1
	return crc1 & MAX32, crc2 & MAX32


	def bootcode_version(f):
	f.seek(0x40)
	return bootcode_crcs[crc32(f.read(0x1000 - 0x40)) & MAX32]


	# the rest {{{1


	def dump_as(b, fp, size=None):
	with open(fp, 'w+b') as f:
	if size:
	f.write(bytearray(size))
	f.seek(0)
	f.write(b)


	# assume first entry is makerom (0x1060), and second entry begins from makerom
	dma_sig = b"\x00\x00\x00\x00\x00\x00\x10\x60\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x60"


	def z_dump_file(f, i=0):
	vs = R4(f.read(4)) # virtual start
	ve = R4(f.read(4)) # virtual end
	ps = R4(f.read(4)) # physical start
	pe = R4(f.read(4)) # physical end
	here = f.tell()

	if vs == ve == ps == pe == 0:
	return False

	# ve inferred from filesize, and we're making pe be 0
	# ps can just be the end of the last file
	fp = '{:04} V{:08X}'.format(i, vs)

	size = ve - vs

	if ps == 0xFFFFFFFF or pe == 0xFFFFFFFF:
	#lament('file does not exist')
	dump_as(b'', fp, 0)
	elif pe == 0:
	#lament('file is uncompressed')
	pe = ps + size
	f.seek(ps)
	data = f.read(pe - ps)
	dump_as(data, fp, size)
	else:
	#lament('file is compressed')
	f.seek(ps)
	compressed = f.read(pe - ps)
	if compressed[:4] == b'Yaz0':
	dump_as(compressed, fp + '.Yaz0', len(compressed))
	else:
	lament('unknown compression:', fp)
	dump_as(compressed, fp, len(compressed))

	f.seek(here)
	return True


	def z_find_dma(f):
	while True:
	# assume row alignment
	data = f.read(16)
	if len(data) == 0: # EOF
	break
	if data == dma_sig[:16]:
	rest = dma_sig[16:]
	if f.read(len(rest)) == rest:
	return f.tell() - len(rest) - 16
	else:
	f.seek(len(rest), 1)


	def z_dump(f):
	f.seek(0x1060) # skip header when finding dmatable
	addr = z_find_dma(f)
	if addr == None:
	raise Exception("couldn't find file offset table")

	f.seek(addr - 0x30)
	build = f.read(0x30).replace(b'\x00', b' ').strip()
	print("build", str(build, 'utf-8'), sep='\t')

	f.seek(addr)
	i = 0
	while z_dump_file(f, i):
	i += 1


	def z_read_file(path, fn=None):
	if fn == None:
	fn = os.path.basename(path)

	if len(fn) < 14:
	return False, None, None

	fn = str(fn[:14])

	if fn[4:6] != ' V':
	return False, None, None

	try:
	vs = int(fn[ 6: 14], 16)
	except ValueError:
	return False, None, None

	with open(path, 'rb') as f:
	data = f.read()

	return True, data, vs


	def z_write_dma(f, dma):
	dma.sort(key=lambda vf: vf[0]) # sort by vs
	assert len(dma) > 2
	dma_entry = dma[2] # assumption
	vs, ve, ps, pe = dma_entry

	# initialize with zeros
	dma_size = ve - vs
	f.seek(ps)
	f.write(bytearray(dma_size))

	f.seek(ps)
	for vf in dma:
	vs, ve, ps, pe = vf
	#lament('{:08X} {:08X} {:08X} {:08X}'.format(vs, ve, ps, pe))
	f.write(W4(vs))
	f.write(W4(ve))
	f.write(W4(ps))
	f.write(W4(pe))
	assert f.tell() <= (pe or ve)


	def update_crc(f):
	bootcode = bootcode_version(f)
	print('bootcode', bootcode, sep='\t')
	crc1, crc2 = crc(f, bootcode)
	print('crc1', '{:08X}'.format(crc1), sep='\t')
	print('crc2', '{:08X}'.format(crc2), sep='\t')
	f.seek(0x10)
	f.write(W4(crc1))
	f.write(W4(crc2))


	def dump_rom(fp, outpath=None):
	with open(fp, 'rb') as f:
	data = f.read()

	with BytesIO(data) as f:
	start = f.read(4)
	if start == b'\x37\x80\x40\x12':
	swap_order(f)
	elif start != b'\x80\x37\x12\x40':
	raise Exception('not a .z64: {}'.format(fp))

	f.seek(0)
	romhash = sha1(f.read()).hexdigest()

	if outpath is None:
	outpath = romhash

	if os.path.exists(outpath):
	lament("warning: output directory already exists:", outpath)

	with SubDir(outpath):
	f.seek(0)
	z_dump(f)

	return outpath, len(data)


	def recompress_files(path):
	root, _, files = next(os.walk(path))
	files.sort()

	for i, fn in enumerate(files):
	if fn.endswith('.Yaz0'):
	uncomp_fn = fn[:-len('.Yaz0')]
	#print("recompressing", fn, sep='\t')

	comp_path = os.path.join(root, fn)
	uncomp_path = os.path.join(root, uncomp_fn)

	with open(comp_path, 'rb') as f:
	old_hash = sha1(f.read()).hexdigest()

	uncomp = decompress_file(comp_path, uncomp_path)
	comp = compress_file(uncomp_path, comp_path)
	os.remove(uncomp_path)

	with open(comp_path, 'rb') as f:
	new_hash = sha1(f.read()).hexdigest()

	if old_hash != new_hash:
	print("mismatch", old_hash[:7], new_hash[:7], fn, sep='\t')


	def create_rom(directory, fp_out=None, rom_size=64 * 1024 * 1024):
	root, _, files = next(os.walk(directory))
	files.sort()

	if fp_out is None:
	fp_out = directory + '.z64'

	with open(fp_out, 'w+b') as f:
	dma = []

	# initialize with a repeating pattern
	pattern = bytearray(i for i in range(256))
	f.write(pattern * (rom_size // len(pattern)))

	f.seek(0)

	start_v = 0
	start_p = 0

	for i, fn in enumerate(files):
	path = os.path.join(root, fn)
	success, data, vs = z_read_file(path, fn)
	if not success:
	lament('skipping:', fn)
	continue

	size_v = len(data)
	size_p = size_v
	unempty = size_v > 0
	compressed = size_v >= 4 and data[:4] == b'Yaz0'

	if i <= 2:
	# makerom, boot, dmadata need to be exactly where they were
	start_v = vs
	start_p = start_v
	else:
	start_v = align(start_v)
	start_p = align(start_p)

	if unempty:
	ps = start_p
	if compressed:
	pe = align(start_p + size_p)
	ve = vs + int.from_bytes(data[4:8], 'big')
	else:
	pe = 0
	ve = vs + size_v
	else:
	ps = 0xFFFFFFFF
	pe = 0xFFFFFFFF
	ve = vs

	assert start_v <= rom_size
	assert start_v + size_v <= rom_size

	# i'm not sure how picky the game is with the dmatable.
	assert vs % 0x10 == 0
	assert ve % 0x10 == 0

	if unempty:
	f.seek(start_p)
	f.write(data)

	dma.append([vs, ve, ps, pe])

	start_v += size_v
	start_p += size_p

	z_write_dma(f, dma)
	update_crc(f)

	return fp_out


	def recompress(fp_in, fp_out):
	try:
	dumped, size = dump_rom(fp_in)
	recompress_files(dumped)
	newrom = create_rom(dumped, fp_out, rom_size=size)
	print("created", newrom, sep='\t')
	except:
	traceback.print_exc(file=sys.stderr)
	return False
	return True


	def print_help(name, file=sys.stdout):
	print("""{}: deconstruct and reconstruct Ocarina of Time ROMs\

	usage: {{path to Yaz0 (de)compressor}} {{path to ROM}} {{output filename}}
	""".format(name), file=file)


	def main(program_name, args):
	for help_like in ('-h', '--help', '-?', '/?'):
	if help_like in args:
	print_help(program_name)
	return 0

	if len(args) < 3:
	print_help(program_name, sys.stderr)
	return 1

	global z64yaz0_path
	z64yaz0_path = os.path.abspath(args[0])

	ok = recompress(args[1], args[2])
	return 0 if ok else 1


	if __name__ == '__main__':
	try:
	sys.exit(main(sys.argv[0], sys.argv[1:]))
	except KeyboardInterrupt:
	sys.exit(1)
	#include <stdio.h>
	#include <stdlib.h>
	#include <string.h>
	#include <stdint.h>

	// version 1.0 (20050707) by shevious
	// Thanks to thakis for yaz0dec 1.0.

	typedef uint8_t u8;
	typedef uint32_t u32;

	#define MAX_RUNLEN (0xFF + 0x12)

	// simple and straight encoding scheme for Yaz0
	static u32 simpleEnc(u8 src, int size, int pos, u32 pMatchPos)
	{
	int startPos = pos - 0x1000;
	u32 numBytes = 1;
	u32 matchPos = 0;

	int end = size - pos;
	// maximum runlength for 3 byte encoding
	if (end > MAX_RUNLEN)
	end = MAX_RUNLEN;

	if (startPos < 0)
	startPos = 0;
	for (int i = startPos; i < pos; i++) {
	int j;
	for (j = 0; j < end; j++) {
	if (src[i + j] != src[j + pos])
	break;
	}
	if (j > numBytes) {
	numBytes = j;
	matchPos = i;
	}
	}

	*pMatchPos = matchPos;

	if (numBytes == 2)
	numBytes = 1;

	return numBytes;
	}

	// a lookahead encoding scheme for ngc Yaz0
	static u32 nintendoEnc(u8 src, int size, int pos, u32 pMatchPos)
	{
	u32 numBytes = 1;
	static u32 numBytes1;
	static u32 matchPos;
	static int prevFlag = 0;

	// if prevFlag is set, it means that the previous position
	// was determined by look-ahead try.
	// so just use it. this is not the best optimization,
	// but nintendo's choice for speed.
	if (prevFlag == 1) {
	*pMatchPos = matchPos;
	prevFlag = 0;
	return numBytes1;
	}

	prevFlag = 0;
	numBytes = simpleEnc(src, size, pos, &matchPos);
	*pMatchPos = matchPos;

	// if this position is RLE encoded, then compare to copying 1 byte and next position(pos+1) encoding
	if (numBytes >= 3) {
	numBytes1 = simpleEnc(src, size, pos + 1, &matchPos);
	// if the next position encoding is +2 longer than current position, choose it.
	// this does not guarantee the best optimization, but fairly good optimization with speed.
	if (numBytes1 >= numBytes + 2) {
	numBytes = 1;
	prevFlag = 1;
	}
	}
	return numBytes;
	}

	static int encodeYaz0(u8 src, u8 dst, int srcSize)
	{
	int srcPos = 0;
	int dstPos = 0;
	int bufPos = 0;

	u8 buf[24]; // 8 codes * 3 bytes maximum

	u32 validBitCount = 0; // number of valid bits left in "code" byte
	u8 currCodeByte = 0; // a bitfield, set bits meaning copy, unset meaning RLE

	while (srcPos < srcSize) {
	u32 numBytes;
	u32 matchPos;

	numBytes = nintendoEnc(src, srcSize, srcPos, &matchPos);
	if (numBytes < 3) {
	// straight copy
	buf[bufPos] = src[srcPos];
	bufPos++;
	srcPos++;
	//set flag for straight copy
	currCodeByte \|= (0x80 >> validBitCount);
	} else {
	//RLE part
	u32 dist = srcPos - matchPos - 1;
	u8 byte1, byte2, byte3;

	if (numBytes >= 0x12) { // 3 byte encoding
	byte1 = 0 \| (dist >> 8);
	byte2 = dist & 0xFF;
	buf[bufPos++] = byte1;
	buf[bufPos++] = byte2;
	// maximum runlength for 3 byte encoding
	if (numBytes > MAX_RUNLEN)
	numBytes = MAX_RUNLEN;
	byte3 = numBytes - 0x12;
	buf[bufPos++] = byte3;
	} else { // 2 byte encoding
	byte1 = ((numBytes - 2) << 4) \| (dist >> 8);
	byte2 = dist & 0xFF;
	buf[bufPos++] = byte1;
	buf[bufPos++] = byte2;
	}
	srcPos += numBytes;
	}

	validBitCount++;

	// write eight codes
	if (validBitCount == 8) {
	dst[dstPos++] = currCodeByte;
	for (int j = 0; j < bufPos; j++)
	dst[dstPos++] = buf[j];

	currCodeByte = 0;
	validBitCount = 0;
	bufPos = 0;
	}
	}

	if (validBitCount > 0) {
	dst[dstPos++] = currCodeByte;
	for (int j = 0; j < bufPos; j++)
	dst[dstPos++] = buf[j];

	currCodeByte = 0;
	validBitCount = 0;
	bufPos = 0;
	}

	return dstPos;
	}

	void decompress(u8 src, u8 dst, int uncompressedSize)
	{
	int srcPlace = 0, dstPlace = 0; // current read/write positions

	u32 validBitCount = 0; // number of valid bits left in "code" byte
	u8 currCodeByte = 0;

	while (dstPlace < uncompressedSize) {
	// read new "code" byte if the current one is used up
	if (validBitCount == 0) {
	currCodeByte = src[srcPlace++];
	validBitCount = 8;
	}

	if ((currCodeByte & 0x80) != 0) {
	// straight copy
	dst[dstPlace++] = src[srcPlace++];
	} else {
	// RLE part
	u8 byte1 = src[srcPlace++];
	u8 byte2 = src[srcPlace++];

	u32 dist = ((byte1 & 0xF) << 8) \| byte2;
	u32 copySource = dstPlace - (dist + 1);

	u32 numBytes = byte1 >> 4;
	if (numBytes == 0) {
	numBytes = src[srcPlace++] + 0x12;
	} else {
	numBytes += 2;
	}

	// copy run
	for(int i = 0; i < numBytes; ++i) {
	dst[dstPlace++] = dst[copySource++];
	}
	}

	// use next bit from "code" byte
	currCodeByte <<= 1;
	validBitCount--;
	}
	}

	int main(int argc, char *argv[])
	{
	if (argc != 3) {
	if (argc <= 0) {
	fputs("You shouldn't have done that.\n", stderr);
	exit(1);
	}
	fprintf(stderr, "usage: %s {input-file} {output-file}\n", argv[0]);
	exit(1);
	}

	FILE *fin = fopen(argv[1], "rb");

	if (fin == NULL) {
	perror(argv[1]);
	exit(1);
	}

	fseek(fin, 0, SEEK_END);
	long size = ftell(fin);
	fseek(fin, 0, SEEK_SET);

	u8 *bufi = malloc(size);
	fread(bufi, 1, size, fin);

	fclose(fin);

	FILE *fout = fopen(argv[2], "wb");

	if (fout == NULL) {
	perror(argv[1]);
	exit(1);
	}

	if (size > 0x10
	&& bufi[0] == 'Y'
	&& bufi[1] == 'a'
	&& bufi[2] == 'z'
	&& bufi[3] == '0') {
	long usize = (bufi[4] << 24)
	\| (bufi[5] << 16)
	\| (bufi[6] << 8)
	\| bufi[7];
	u8 *bufo = malloc(usize);
	decompress(bufi + 16, bufo, usize);
	fwrite(bufo, usize, 1, fout);
	free(bufo);

	} else {
	// we don't know how big the "compressed" file could get,
	// so over-allocate!
	// modern systems have more RAM than the largest Yaz0 file, so...
	u8 bufo = malloc(size 2);

	// write 4 bytes yaz0 header
	bufo[0] = 'Y';
	bufo[1] = 'a';
	bufo[2] = 'z';
	bufo[3] = '0';

	// write 4 bytes uncompressed size
	bufo[4] = (size >> 24) & 0xFF;
	bufo[5] = (size >> 16) & 0xFF;
	bufo[6] = (size >> 8) & 0xFF;
	bufo[7] = (size >> 0) & 0xFF;

	// write 8 bytes unused dummy
	bufo[8] = 0;
	bufo[9] = 0;
	bufo[10] = 0;
	bufo[11] = 0;
	bufo[12] = 0;
	bufo[13] = 0;
	bufo[14] = 0;
	bufo[15] = 0;

	long csize = encodeYaz0(bufi, bufo + 16, size) + 16;

	// pad compressed file to be a multiple of 16 bytes.
	long ceilsize = (csize + 15) & ~0xF;
	for (long i = csize; i < ceilsize; i++)
	bufo[i] = 0;

	fwrite(bufo, ceilsize, 1, fout);
	free(bufo);
	}

	free(bufi);
	fclose(fout);
	}