Created
March 30, 2022 17:55
-
-
Save nfalliere/d08e0529e28e3c2426ea0a6f01ba0273 to your computer and use it in GitHub Desktop.
Extract the zip files that are contained in a binary file (e.g. memory dump)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import sys | |
from struct import unpack | |
def extract(buf, ibeg, iend): | |
name = 'sub%08X.zip' % ibeg | |
print('Dumping: %s' % name) | |
with open(name, 'wb') as f: | |
f.write(buf[ibeg:iend]) | |
def process_file(path): | |
with open(path, 'rb') as f: | |
buf = f.read() | |
i = 0 | |
while i <= len(buf) - 30: | |
# verify that we are on a file header entry: at least 30 bytes, starts with PK\3\4 | |
is_ziphdr = buf[i] == 0x50 and buf[i+1] == 0x4B and buf[i+2] == 3 and buf[i+3] == 4 | |
if not is_ziphdr: | |
i += 1 | |
continue | |
versionNeededToExtract, = unpack('<H', buf[i+4:i+6]) | |
# (heuristic) additional filtering to avoid false positives | |
if versionNeededToExtract > 20: | |
i += 1 | |
continue | |
ibeg = i | |
while i <= len(buf) - 22: | |
# EOCD record: 22+N bytes, starts with PK\5\6 | |
is_eocd = buf[i] == 0x50 and buf[i+1] == 0x4B and buf[i+2] == 5 and buf[i+3] == 6 | |
if not is_eocd: | |
i += 1 | |
continue | |
diskNumber, cdDiskStartIndex, cdRecordCountOnThisDisk, cdRecordCountTotal, cdSize, cdOffset, commentlen = unpack('<HHHHIIH', buf[i+4:i+22]) | |
# (heuristic) additional filtering to avoid false positives | |
if diskNumber > 20 or commentlen > 300: | |
i += 1 | |
continue | |
iend = i + 22 + commentlen | |
extract(buf, ibeg, iend) | |
i = iend | |
break | |
def usage(): | |
print('Extract the zip files that are contained in a binary file (e.g. memory dump)') | |
print('Usage:') | |
print(' %s <input_filepath>' % os.path.split(sys.argv[0])[-1]) | |
print('Information:') | |
print(' Zip files will be searched for and extracted to subXXX.zip files') | |
print(' This script relies on heuristics and may not find all zip files, or dump incorrect zip files') | |
sys.exit(-1) | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
usage() | |
process_file(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment