jtauber · February 27, 2012 20:48
diff --git a/trim_zip.py b/trim_zip.py
 import struct

 ECD_SIG = "\x50\x4B\x05\x06"
 ECD_LEN = 22

 def trim_zip(filename_in, filename_out):
    """
    trims the zipfile give by filename `filename_in`, removing any bytes
    after the end of central directory record and writing the result to
    a file with filename `filename_out`.
    
    returns the number of entries in the file according to the end of central
    directory record.
    """
    
    # read the file into memory for quick searching
    file_in = open(filename_in)
    data = file_in.read()
    
    # seek the end of central directory
    file_in.seek(data.index(ECD_SIG))
    ecd = file_in.read(ECD_LEN)
    ecd_fields = struct.unpack("<IHHHHIIH", ecd)
    
    # just some sanity checks
    assert ecd_fields[0] == 0x06054b50
    assert ecd_fields[1] == 0
    assert ecd_fields[2] == 0
    assert ecd_fields[3] == ecd_fields[4]
    
    # ignore everything after the end of central directory
    trimmed_data = data[:file_in.tell()]
    
    file_out = open(filename_out, "wb")
    file_out.write(trimmed_data)
    file_out.close()
    
    return ecd_fields[4]

 if __name__ == "__main__":
    print trim_zip("ziptest.zip", "ziptest2.zip")
    
    import zipfile
    print zipfile.is_zipfile("ziptest.zip")
    print zipfile.is_zipfile("ziptest2.zip")
	import struct

	ECD_SIG = "\x50\x4B\x05\x06"
	ECD_LEN = 22

	def trim_zip(filename_in, filename_out):
	"""
	trims the zipfile give by filename `filename_in`, removing any bytes
	after the end of central directory record and writing the result to
	a file with filename `filename_out`.

	returns the number of entries in the file according to the end of central
	directory record.
	"""

	# read the file into memory for quick searching
	file_in = open(filename_in)
	data = file_in.read()

	# seek the end of central directory
	file_in.seek(data.index(ECD_SIG))
	ecd = file_in.read(ECD_LEN)
	ecd_fields = struct.unpack("<IHHHHIIH", ecd)

	# just some sanity checks
	assert ecd_fields[0] == 0x06054b50
	assert ecd_fields[1] == 0
	assert ecd_fields[2] == 0
	assert ecd_fields[3] == ecd_fields[4]

	# ignore everything after the end of central directory
	trimmed_data = data[:file_in.tell()]

	file_out = open(filename_out, "wb")
	file_out.write(trimmed_data)
	file_out.close()

	return ecd_fields[4]

	if __name__ == "__main__":
	print trim_zip("ziptest.zip", "ziptest2.zip")

	import zipfile
	print zipfile.is_zipfile("ziptest.zip")
	print zipfile.is_zipfile("ziptest2.zip")