Created
July 4, 2024 13:54
-
-
Save UserUnknownFactor/653c6e07df920d2a253997b1b1860ccc to your computer and use it in GitHub Desktop.
Split a file into two by binary signature
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, shutil, os | |
def find_and_dump(file_path, signature, ext1, ext2): | |
"""Finds a byte signature in a file and dumps the content before and after that point. | |
Args: | |
file_path: Path to the file to search. | |
signature: Byte signature to search for. | |
ext: Extension to use for the output files. | |
Returns: | |
True if the signature was found and content dumped, False otherwise. | |
""" | |
with open(file_path, 'rb') as f: | |
buffer_size = 4096 | |
print(f"signature: {signature} buffer: {buffer_size}") | |
prev_buffer = b'' | |
buffer = f.read(buffer_size) | |
while buffer: | |
# Search for the signature in the current buffer and the overlapping region | |
combined_buffer = prev_buffer + buffer | |
pos = combined_buffer.find(signature) | |
if pos != -1: | |
# Signature found! | |
# Calculate the correct position in the file | |
file_pos = f.tell() - len(combined_buffer) + pos | |
# Create output file names (append "_before" and "_after" to the original name) | |
base, _ = os.path.splitext(file_path) | |
before_file_path = base + ext2 | |
after_file_path = base + ext1 | |
# Dump content before the signature | |
with open(before_file_path, 'wb') as before_file: | |
f.seek(0) | |
before_file.write(f.read(file_pos)) | |
print(f"Content before signature dumped to: {before_file_path}") | |
# Dump content after the signature | |
with open(after_file_path, 'wb') as after_file: | |
f.seek(file_pos) | |
shutil.copyfileobj(f, after_file) # Efficiently copy the remaining content | |
print(f"Content after signature dumped to: {after_file_path}") | |
return True | |
prev_buffer = buffer[-len(signature):] # Store the overlapping region for the next iteration | |
buffer = f.read(buffer_size) # Read the next chunk | |
# Signature not found | |
print(f"Signature not found in {file_path}") | |
return False | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Find a byte signature and dump content.') | |
parser.add_argument('file', help='Path to the file to search.') | |
parser.add_argument('-s', '--signature', default=None, help='Byte signature to search for (e.g., "DE AD BE EF").') | |
parser.add_argument('-b', '--beforeext', default='.bin', help='Extension of the first file.') | |
parser.add_argument('-a', '--aftertext', default='.xp3', help='Extension of the second file.') | |
args = parser.parse_args() | |
# Convert signature string to bytes object | |
signature_bytes = b'XP3\r\n\x20\x0A\x1A\x8B\x67\x01' if not args.signature else bytes.fromhex(args.signature.replace(" ", "")) | |
find_and_dump(args.file, signature_bytes, args.aftertext, args.beforeext) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment