Created
February 27, 2012 20:48
-
-
Save jtauber/1926926 to your computer and use it in GitHub Desktop.
trim extra data off end of zipfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
ECD_SIG = "\x50\x4B\x05\x06" | |
ECD_LEN = 22 | |
def trim_zip(filename_in, filename_out): | |
""" | |
trims the zipfile give by filename `filename_in`, removing any bytes | |
after the end of central directory record and writing the result to | |
a file with filename `filename_out`. | |
returns the number of entries in the file according to the end of central | |
directory record. | |
""" | |
# read the file into memory for quick searching | |
file_in = open(filename_in) | |
data = file_in.read() | |
# seek the end of central directory | |
file_in.seek(data.index(ECD_SIG)) | |
ecd = file_in.read(ECD_LEN) | |
ecd_fields = struct.unpack("<IHHHHIIH", ecd) | |
# just some sanity checks | |
assert ecd_fields[0] == 0x06054b50 | |
assert ecd_fields[1] == 0 | |
assert ecd_fields[2] == 0 | |
assert ecd_fields[3] == ecd_fields[4] | |
# ignore everything after the end of central directory | |
trimmed_data = data[:file_in.tell()] | |
file_out = open(filename_out, "wb") | |
file_out.write(trimmed_data) | |
file_out.close() | |
return ecd_fields[4] | |
if __name__ == "__main__": | |
print trim_zip("ziptest.zip", "ziptest2.zip") | |
import zipfile | |
print zipfile.is_zipfile("ziptest.zip") | |
print zipfile.is_zipfile("ziptest2.zip") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment