Last active
August 13, 2018 10:26
-
-
Save alexander-hanel/1e33611ea310e3be34e5804b9ad6f85c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'Alexander Hanel' | |
__date__ = '2018/02/28' | |
__version__ = "2.0" | |
__title__ = "struct creator" | |
import re | |
""" | |
Example: | |
ARGS: | |
run(STRING, 32 or 64 (bit), NAMED) | |
INPUT: | |
run(''' | |
BYTE bType; | |
BYTE bVersion; | |
WORD Reserved; | |
DWORD aiKeyAlg''', 32, "BLOBHEADER") | |
class BLOBHEADER: | |
def __init__(self, data): | |
self.bType = None # BYTE | |
self.bVersion = None # BYTE | |
self.Reserved = None # WORD | |
self.aiKeyAlg = None # DWORD | |
self._parse_data(data) | |
def _parse_data(self, data): | |
# Struct unpacking code. Initiate cur_addr to 0 | |
cur_addr = 0 | |
temp_data = data[cur_addr: cur_addr + 1] | |
self.bType = struct.unpack("<B", temp_data)[0] | |
cur_addr += 1 | |
temp_data = data[cur_addr: cur_addr + 1] | |
self.bVersion = struct.unpack("<B", temp_data)[0] | |
cur_addr += 1 | |
temp_data = data[cur_addr: cur_addr + 2] | |
self.Reserved = struct.unpack("<H", temp_data)[0] | |
cur_addr += 2 | |
temp_data = data[cur_addr: cur_addr + 4] | |
self.aiKeyAlg = struct.unpack("<I", temp_data)[0] | |
cur_addr += 4 | |
Type value sources: | |
https://docs.google.com/spreadsheet/ccc?key=0ArV2XEi4eBdpcEs1Q0VjZEc5R1lHZU83SzJkbUVjQmc#gid=0 | |
[MS-DTYP].pdf (duck duck go it) | |
""" | |
def open_file(f): | |
""" | |
simple open file | |
""" | |
# TODO - add open file option | |
with open(f) as file_handle: | |
return file_handle.readline() | |
def strip_line(line): | |
""" | |
Split line and remove empty elements in the list | |
""" | |
split_line = line.split(" ") | |
return [x for x in split_line if x != ""] | |
def get_size(line): | |
""" | |
get size for definitions. example UINT8 Name[16], returns Name, 16 | |
""" | |
# TODO: Add functionality to parse hex values, have not observed yet. | |
if "[" in line: | |
sear = re.search("\[(.*?)\]", line) | |
value = sear.group(1) | |
temp_name = line[:line.find("[")] | |
return temp_name, int(value) | |
else: | |
return line, 1 | |
def parse_line(line, bit): | |
""" | |
extract information about the type, size, name, bit and signed. | |
return signed, size, type_name, name, bit | |
""" | |
# TODO: BSTR, LMCSTR, LMSTR, LPCSTR, LPCWSTR, | |
# TODO: Bit value is returned but not implemented. No use cases or need to yet. | |
if len(line) == 0 or "//" in line[0]: | |
return None, None, None, None, None | |
name = line[1].strip(";") | |
name, multi = get_size(name) | |
line_type = line[0].strip("*").upper() | |
line_type = line_type.strip("__") | |
# Size: single bit | |
if line_type in ["BIT"]: | |
return False, 1 * multi, line_type, name, True | |
# Size: one byte unsigned | |
if line_type in ["BOOLEAN", "PBOOLEAN", "BYTE", "PBYTE", "LPBYTE", "UCHAR", "PUCHAR", "UINT8", "CHAR", "PCHAR", | |
"OCTET"]: | |
return False, 1 * multi, line_type, name, False | |
# Size: one byte signed | |
if line_type in ["INT8"]: | |
return True, 1 * multi, line_type, name, False | |
# Size: two bytes unsigned | |
if line_type in ["UINT16", "USHORT", "WORD", "PWORD", "LPWORD"]: | |
return False, 2 * multi, line_type, name, False | |
# Size: two bytes signed | |
if line_type in ["INT16", "SHORT"]: | |
return True, 2 * multi, line_type, name, False | |
# Size: four bytes unsigned | |
if line_type in ["DWORD", "PDWORD", "LPDWORD", "DWORD32", "LPCSTR", "LPCVOID","LPCWSTR", "PSTR", "LPSTR", | |
"LPWSTR", "PWSTR", "UINT", "UINT32", "PULONG", "ULONG32", "NTSTATUS", "BOOL", "PBOOL", | |
"LPBOOL", "ULONG_PTR", "DWORD_PTR", "FLOAT", "HRESULT", "NTSTATUS", "RPC_BINDING_HANDLE"]: | |
return False, 4 * multi, line_type, name, False | |
# Size: four bytes signed | |
if line_type in ["LPINT", "INT", "INT32","LONG", "PLONG", "LPLONG", "LONG32"]: | |
return True, 4 * multi, line_type, name, False | |
# can be either 32 bit or 64 | |
if line_type in ["ULONG_PTR", "ULONG", "HANDLE_T", "ADCONNECTION_HANDLE", "HANDLE", "LDAP_UDP_HANDLE", | |
"PCONTEXT_HANDLE", "PPCONTEXT_HANDLE"]: | |
if bit == 32: | |
return False, 4 * multi, line_type, name, False | |
else: | |
return False, 8 * multi, line_type, name, False | |
# Size: eight bytes unsigned | |
if line_type in ["DWORD64", "PDWORD64", "DWORDLONG","PDWORDLONG", "QWORD", "UINT64", "ULONG64", "ULONGLONG", | |
"DOUBLE", "INT64", "LONGLONG", "LONG64"]: | |
return False, 8 * multi, line_type, name, False | |
# Size: eight bytes signed | |
""" | |
# Size: sixteen bytes unsigned | |
if line_type in ["UINT128"]: | |
return False, 16 * multi, line_type, name, False | |
""" | |
# signed, size in bytes, type, name, bool for bits | |
return None, None, line_type, name, None | |
def get_code(signed, length, bit, class_name, name): | |
""" | |
format and create python code to unpack binary data. | |
""" | |
if signed: | |
if length is 1: | |
return """\t\ttemp_data = data[cur_addr: cur_addr + 1] | |
\t\t%s.%s = struct.unpack("<b", temp_data)[0] | |
\t\tcur_addr += 1 | |
""" % (class_name, name) | |
elif length is 2: | |
return """\t\ttemp_data = data[cur_addr: cur_addr + 2] | |
\t\t%s.%s = struct.unpack("<h", temp_data)[0] | |
\t\tcur_addr += 2 | |
""" % (class_name, name) | |
elif length is 4: | |
return """\t\ttemp_data = data[cur_addr: cur_addr + 4] | |
\t\t%s.%s = struct.unpack("<i", temp_data)[0] | |
\t\tcur_addr += 4 | |
""" % (class_name, name) | |
elif length is 8: | |
return"""\t\ttemp_data = data[cur_addr: cur_addr + 8] | |
\t\t%s.%s = struct.unpack("<q", temp_data)[0] | |
\t\tcur_addr += 8 | |
""" % (class_name, name) | |
#TODO finish scenarios | |
pass | |
else: | |
if length is 1: | |
return """\t\ttemp_data = data[cur_addr: cur_addr + 1] | |
\t\t%s.%s = struct.unpack("<B", temp_data)[0] | |
\t\tcur_addr += 1 | |
""" % (class_name, name) | |
elif length is 2: | |
return """\t\ttemp_data = data[cur_addr: cur_addr + 2] | |
\t\t%s.%s = struct.unpack("<H", temp_data)[0] | |
\t\tcur_addr += 2 | |
""" % (class_name, name) | |
elif length is 4: | |
return """\t\ttemp_data = data[cur_addr: cur_addr + 4] | |
\t\t%s.%s = struct.unpack("<I", temp_data)[0] | |
\t\tcur_addr += 4 | |
""" % (class_name, name) | |
elif length is 8: | |
return"""\t\ttemp_data = data[cur_addr: cur_addr + 8] | |
\t\t%s.%s = struct.unpack("<Q", temp_data)[0] | |
\t\tcur_addr += 8 | |
""" % (class_name, name) | |
elif length: | |
return"""\t\t%s.%s = data[cur_addr: cur_addr + %s] | |
\t\tcur_addr += %s | |
""" % (class_name, name, length, length) | |
return | |
def create_class_header(name): | |
""" Create class header text """ | |
return """class %s: | |
\tdef __init__(self, data):\n""" % name | |
def create_parser_function_header(): | |
""" Creates the start of the _parse_data function""" | |
return """\n\tdef _parse_data(self, data): | |
\t\t# Struct unpacking code. Initiate cur_addr to 0 | |
\t\tcur_addr = 0""" | |
def append_parser_function(): | |
return """\t\tself._parse_data(data)""" | |
def run(buffer, bit, class_name=None): | |
str_class = "" | |
parse = "" | |
str_class = create_class_header(class_name) | |
for line in buffer.split("\n"): | |
sl = strip_line(line) | |
signed, size, type_name, name, b = parse_line(sl, bit) | |
# if the line begins with a comment type_name is None | |
if type_name is None: | |
continue | |
# create the definition for the class variables | |
str_class += "\t\tself.%s = None # %s \n" % (name, type_name) | |
if size is not None: | |
parse += get_code(signed, size, b,"self", name) | |
elif type_name is not None: | |
# TODO - fix struct of structs | |
parse += """\t\tself.%s = %s() # TODO:\n""" % (name, type_name) | |
parse += "\t\t# cur_addr = UNKNOWN # Update cur_addr\n" | |
print str_class + append_parser_function() | |
print create_parser_function_header() | |
print parse |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To execute the code I typically add the struct definitions to the end of the above script. For example.
If a structure contains an embedded structure, manually calculate the size and add it as
BYTE STRUCT_NAME[INTEGER_SIZE];
for exampleBYTE Name[8];
. This can be used to read a struct of n size. The above snippet will output Python code that can be used to parse a binary structure. Make sure to addimport struct
to your code.