Skip to content

Instantly share code, notes, and snippets.

@jtauber
Created February 27, 2012 20:48
Show Gist options
  • Save jtauber/1926926 to your computer and use it in GitHub Desktop.
Save jtauber/1926926 to your computer and use it in GitHub Desktop.
trim extra data off end of zipfile
import struct
ECD_SIG = "\x50\x4B\x05\x06"
ECD_LEN = 22
def trim_zip(filename_in, filename_out):
"""
trims the zipfile give by filename `filename_in`, removing any bytes
after the end of central directory record and writing the result to
a file with filename `filename_out`.
returns the number of entries in the file according to the end of central
directory record.
"""
# read the file into memory for quick searching
file_in = open(filename_in)
data = file_in.read()
# seek the end of central directory
file_in.seek(data.index(ECD_SIG))
ecd = file_in.read(ECD_LEN)
ecd_fields = struct.unpack("<IHHHHIIH", ecd)
# just some sanity checks
assert ecd_fields[0] == 0x06054b50
assert ecd_fields[1] == 0
assert ecd_fields[2] == 0
assert ecd_fields[3] == ecd_fields[4]
# ignore everything after the end of central directory
trimmed_data = data[:file_in.tell()]
file_out = open(filename_out, "wb")
file_out.write(trimmed_data)
file_out.close()
return ecd_fields[4]
if __name__ == "__main__":
print trim_zip("ziptest.zip", "ziptest2.zip")
import zipfile
print zipfile.is_zipfile("ziptest.zip")
print zipfile.is_zipfile("ziptest2.zip")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment