alexander-hanel · August 13, 2018 10:26 · alexander-hanel · Feb 28, 2018
diff --git a/struct_creator.py b/struct_creator.py
 __author__ = 'Alexander Hanel'
 __date__ = '2018/02/28'
 __version__ = "2.0"
 __title__ = "struct creator"
 import re

 """

 Example:

 ARGS:
    run(STRING, 32 or 64 (bit), NAMED)

 INPUT:
    run('''
    BYTE bType;
    BYTE bVersion;
    WORD Reserved;
    DWORD aiKeyAlg''', 32, "BLOBHEADER")

 class BLOBHEADER:
 	def __init__(self, data):
 		self.bType = None #  BYTE 
 		self.bVersion = None #  BYTE 
 		self.Reserved = None #  WORD 
 		self.aiKeyAlg = None #  DWORD 
 		self._parse_data(data)

 	def _parse_data(self, data):
 		# Struct unpacking code. Initiate cur_addr to 0
 		cur_addr = 0
 		temp_data = data[cur_addr: cur_addr + 1]
 		self.bType = struct.unpack("<B", temp_data)[0]
 		cur_addr += 1
 		temp_data = data[cur_addr: cur_addr + 1]
 		self.bVersion = struct.unpack("<B", temp_data)[0]
 		cur_addr += 1
 		temp_data = data[cur_addr: cur_addr + 2]
 		self.Reserved = struct.unpack("<H", temp_data)[0]
 		cur_addr += 2
 		temp_data = data[cur_addr: cur_addr + 4]
 		self.aiKeyAlg = struct.unpack("<I", temp_data)[0]
 		cur_addr += 4


 Type value sources:
    https://docs.google.com/spreadsheet/ccc?key=0ArV2XEi4eBdpcEs1Q0VjZEc5R1lHZU83SzJkbUVjQmc#gid=0
    [MS-DTYP].pdf (duck duck go it)
 """

 def open_file(f):
    """
    simple open file
    """
    # TODO - add open file option
    with open(f) as file_handle:
        return file_handle.readline()


 def strip_line(line):
    """
    Split line and remove empty elements in the list
    """
    split_line = line.split(" ")
    return [x for x in split_line if x != ""]


 def get_size(line):
    """
    get size for definitions. example UINT8 Name[16], returns Name, 16
    """
    # TODO: Add functionality to parse hex values, have not observed yet.
    if "[" in line:
        sear = re.search("\[(.*?)\]", line)
        value = sear.group(1)
        temp_name = line[:line.find("[")]
        return temp_name, int(value)
    else:
        return line, 1


 def parse_line(line, bit):
    """
    extract information about the type, size, name, bit and signed.
    return signed, size, type_name, name, bit
    """
    # TODO: BSTR, LMCSTR, LMSTR, LPCSTR, LPCWSTR,
    # TODO: Bit value is returned but not implemented. No use cases or need to yet.

    if len(line) == 0 or "//" in line[0]:
        return None, None, None, None, None

    name = line[1].strip(";")
    name, multi = get_size(name)

    line_type = line[0].strip("*").upper()
    line_type = line_type.strip("__")

    # Size: single bit
    if line_type in ["BIT"]:
        return False, 1 * multi, line_type, name, True

    # Size: one byte unsigned
    if line_type in ["BOOLEAN", "PBOOLEAN", "BYTE", "PBYTE", "LPBYTE", "UCHAR", "PUCHAR", "UINT8", "CHAR", "PCHAR",
                     "OCTET"]:
        return False, 1 * multi, line_type, name, False

    # Size: one byte signed
    if line_type in ["INT8"]:
        return True, 1 * multi, line_type, name, False

    # Size: two bytes unsigned
    if line_type in ["UINT16", "USHORT", "WORD", "PWORD", "LPWORD"]:
        return False, 2 * multi, line_type, name, False

    # Size: two bytes signed
    if line_type in ["INT16", "SHORT"]:
        return True, 2 * multi, line_type, name, False

    # Size: four bytes unsigned
    if line_type in ["DWORD", "PDWORD", "LPDWORD", "DWORD32", "LPCSTR", "LPCVOID","LPCWSTR", "PSTR", "LPSTR",
                     "LPWSTR", "PWSTR", "UINT", "UINT32", "PULONG", "ULONG32", "NTSTATUS", "BOOL", "PBOOL",
                     "LPBOOL", "ULONG_PTR", "DWORD_PTR", "FLOAT", "HRESULT", "NTSTATUS", "RPC_BINDING_HANDLE"]:
        return False, 4 * multi, line_type, name, False

    # Size: four bytes signed
    if line_type in ["LPINT", "INT", "INT32","LONG", "PLONG", "LPLONG", "LONG32"]:
        return True, 4 * multi, line_type, name, False

    # can be either 32 bit or 64
    if line_type in ["ULONG_PTR", "ULONG", "HANDLE_T", "ADCONNECTION_HANDLE", "HANDLE", "LDAP_UDP_HANDLE",
                     "PCONTEXT_HANDLE", "PPCONTEXT_HANDLE"]:
        if bit == 32:
            return False, 4 * multi, line_type, name, False
        else:
            return False, 8 * multi, line_type, name, False

    # Size: eight bytes unsigned
    if line_type in ["DWORD64", "PDWORD64", "DWORDLONG","PDWORDLONG", "QWORD", "UINT64", "ULONG64", "ULONGLONG",
                     "DOUBLE", "INT64", "LONGLONG", "LONG64"]:
        return False, 8 * multi, line_type, name, False

    # Size: eight bytes signed

    """
    # Size: sixteen bytes unsigned
    if line_type in ["UINT128"]:
        return False, 16 * multi, line_type, name, False
    """
    # signed, size in bytes, type, name, bool for bits
    return None, None, line_type, name, None


 def get_code(signed, length, bit, class_name, name):
    """
    format and create python code to unpack binary data.
    """
    if signed:
        if length is 1:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
 \t\t%s.%s = struct.unpack("<b", temp_data)[0]
 \t\tcur_addr += 1
 """ % (class_name, name)

        elif length is 2:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
 \t\t%s.%s = struct.unpack("<h", temp_data)[0]
 \t\tcur_addr += 2
 """ % (class_name, name)

        elif length is 4:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
 \t\t%s.%s = struct.unpack("<i", temp_data)[0]
 \t\tcur_addr += 4
 """ % (class_name, name)

        elif length is 8:
            return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
 \t\t%s.%s = struct.unpack("<q", temp_data)[0]
 \t\tcur_addr += 8
 """ % (class_name, name)
        #TODO finish scenarios
        pass
    else:
        if length is 1:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
 \t\t%s.%s = struct.unpack("<B", temp_data)[0]
 \t\tcur_addr += 1
 """ % (class_name, name)

        elif length is 2:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
 \t\t%s.%s = struct.unpack("<H", temp_data)[0]
 \t\tcur_addr += 2
 """ % (class_name, name)

        elif length is 4:
            return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
 \t\t%s.%s = struct.unpack("<I", temp_data)[0]
 \t\tcur_addr += 4
 """ % (class_name, name)

        elif length is 8:
            return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
 \t\t%s.%s = struct.unpack("<Q", temp_data)[0]
 \t\tcur_addr += 8
 """ % (class_name, name)

        elif length:
            return"""\t\t%s.%s = data[cur_addr: cur_addr + %s]
 \t\tcur_addr += %s
 """ % (class_name, name, length, length)
    return

 def create_class_header(name):
    """ Create class header text """
    return """class %s:
 \tdef __init__(self, data):\n""" % name

 def create_parser_function_header():
    """ Creates the start of the _parse_data function"""
    return """\n\tdef _parse_data(self, data):
 \t\t# Struct unpacking code. Initiate cur_addr to 0
 \t\tcur_addr = 0"""

 def append_parser_function():
    return """\t\tself._parse_data(data)"""

 def run(buffer, bit, class_name=None):
    str_class = ""
    parse = ""
    str_class =  create_class_header(class_name)
    for line in buffer.split("\n"):
        sl = strip_line(line)
        signed, size, type_name, name, b = parse_line(sl, bit)
        # if the line begins with a comment type_name is None
        if type_name is None:
            continue
        # create the definition for the class variables
        str_class += "\t\tself.%s = None #  %s \n" % (name, type_name)
        if size is not None:
            parse +=  get_code(signed, size, b,"self", name)
        elif type_name is not None:
            # TODO - fix struct of structs
            parse += """\t\tself.%s = %s()   # TODO:\n""" % (name, type_name)
            parse += "\t\t# cur_addr = UNKNOWN # Update cur_addr\n"

    print str_class + append_parser_function()
    print create_parser_function_header()
    print parse
	__author__ = 'Alexander Hanel'
	__date__ = '2018/02/28'
	__version__ = "2.0"
	__title__ = "struct creator"
	import re

	"""

	Example:

	ARGS:
	run(STRING, 32 or 64 (bit), NAMED)

	INPUT:
	run('''
	BYTE bType;
	BYTE bVersion;
	WORD Reserved;
	DWORD aiKeyAlg''', 32, "BLOBHEADER")

	class BLOBHEADER:
	def __init__(self, data):
	self.bType = None # BYTE
	self.bVersion = None # BYTE
	self.Reserved = None # WORD
	self.aiKeyAlg = None # DWORD
	self._parse_data(data)

	def _parse_data(self, data):
	# Struct unpacking code. Initiate cur_addr to 0
	cur_addr = 0
	temp_data = data[cur_addr: cur_addr + 1]
	self.bType = struct.unpack("<B", temp_data)[0]
	cur_addr += 1
	temp_data = data[cur_addr: cur_addr + 1]
	self.bVersion = struct.unpack("<B", temp_data)[0]
	cur_addr += 1
	temp_data = data[cur_addr: cur_addr + 2]
	self.Reserved = struct.unpack("<H", temp_data)[0]
	cur_addr += 2
	temp_data = data[cur_addr: cur_addr + 4]
	self.aiKeyAlg = struct.unpack("<I", temp_data)[0]
	cur_addr += 4


	Type value sources:
	https://docs.google.com/spreadsheet/ccc?key=0ArV2XEi4eBdpcEs1Q0VjZEc5R1lHZU83SzJkbUVjQmc#gid=0
	[MS-DTYP].pdf (duck duck go it)
	"""

	def open_file(f):
	"""
	simple open file
	"""
	# TODO - add open file option
	with open(f) as file_handle:
	return file_handle.readline()


	def strip_line(line):
	"""
	Split line and remove empty elements in the list
	"""
	split_line = line.split(" ")
	return [x for x in split_line if x != ""]


	def get_size(line):
	"""
	get size for definitions. example UINT8 Name[16], returns Name, 16
	"""
	# TODO: Add functionality to parse hex values, have not observed yet.
	if "[" in line:
	sear = re.search("\[(.*?)\]", line)
	value = sear.group(1)
	temp_name = line[:line.find("[")]
	return temp_name, int(value)
	else:
	return line, 1


	def parse_line(line, bit):
	"""
	extract information about the type, size, name, bit and signed.
	return signed, size, type_name, name, bit
	"""
	# TODO: BSTR, LMCSTR, LMSTR, LPCSTR, LPCWSTR,
	# TODO: Bit value is returned but not implemented. No use cases or need to yet.

	if len(line) == 0 or "//" in line[0]:
	return None, None, None, None, None

	name = line[1].strip(";")
	name, multi = get_size(name)

	line_type = line[0].strip("*").upper()
	line_type = line_type.strip("__")

	# Size: single bit
	if line_type in ["BIT"]:
	return False, 1 * multi, line_type, name, True

	# Size: one byte unsigned
	if line_type in ["BOOLEAN", "PBOOLEAN", "BYTE", "PBYTE", "LPBYTE", "UCHAR", "PUCHAR", "UINT8", "CHAR", "PCHAR",
	"OCTET"]:
	return False, 1 * multi, line_type, name, False

	# Size: one byte signed
	if line_type in ["INT8"]:
	return True, 1 * multi, line_type, name, False

	# Size: two bytes unsigned
	if line_type in ["UINT16", "USHORT", "WORD", "PWORD", "LPWORD"]:
	return False, 2 * multi, line_type, name, False

	# Size: two bytes signed
	if line_type in ["INT16", "SHORT"]:
	return True, 2 * multi, line_type, name, False

	# Size: four bytes unsigned
	if line_type in ["DWORD", "PDWORD", "LPDWORD", "DWORD32", "LPCSTR", "LPCVOID","LPCWSTR", "PSTR", "LPSTR",
	"LPWSTR", "PWSTR", "UINT", "UINT32", "PULONG", "ULONG32", "NTSTATUS", "BOOL", "PBOOL",
	"LPBOOL", "ULONG_PTR", "DWORD_PTR", "FLOAT", "HRESULT", "NTSTATUS", "RPC_BINDING_HANDLE"]:
	return False, 4 * multi, line_type, name, False

	# Size: four bytes signed
	if line_type in ["LPINT", "INT", "INT32","LONG", "PLONG", "LPLONG", "LONG32"]:
	return True, 4 * multi, line_type, name, False

	# can be either 32 bit or 64
	if line_type in ["ULONG_PTR", "ULONG", "HANDLE_T", "ADCONNECTION_HANDLE", "HANDLE", "LDAP_UDP_HANDLE",
	"PCONTEXT_HANDLE", "PPCONTEXT_HANDLE"]:
	if bit == 32:
	return False, 4 * multi, line_type, name, False
	else:
	return False, 8 * multi, line_type, name, False

	# Size: eight bytes unsigned
	if line_type in ["DWORD64", "PDWORD64", "DWORDLONG","PDWORDLONG", "QWORD", "UINT64", "ULONG64", "ULONGLONG",
	"DOUBLE", "INT64", "LONGLONG", "LONG64"]:
	return False, 8 * multi, line_type, name, False

	# Size: eight bytes signed

	"""
	# Size: sixteen bytes unsigned
	if line_type in ["UINT128"]:
	return False, 16 * multi, line_type, name, False
	"""
	# signed, size in bytes, type, name, bool for bits
	return None, None, line_type, name, None


	def get_code(signed, length, bit, class_name, name):
	"""
	format and create python code to unpack binary data.
	"""
	if signed:
	if length is 1:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
	\t\t%s.%s = struct.unpack("<b", temp_data)[0]
	\t\tcur_addr += 1
	""" % (class_name, name)

	elif length is 2:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
	\t\t%s.%s = struct.unpack("<h", temp_data)[0]
	\t\tcur_addr += 2
	""" % (class_name, name)

	elif length is 4:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
	\t\t%s.%s = struct.unpack("<i", temp_data)[0]
	\t\tcur_addr += 4
	""" % (class_name, name)

	elif length is 8:
	return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
	\t\t%s.%s = struct.unpack("<q", temp_data)[0]
	\t\tcur_addr += 8
	""" % (class_name, name)
	#TODO finish scenarios
	pass
	else:
	if length is 1:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 1]
	\t\t%s.%s = struct.unpack("<B", temp_data)[0]
	\t\tcur_addr += 1
	""" % (class_name, name)

	elif length is 2:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 2]
	\t\t%s.%s = struct.unpack("<H", temp_data)[0]
	\t\tcur_addr += 2
	""" % (class_name, name)

	elif length is 4:
	return """\t\ttemp_data = data[cur_addr: cur_addr + 4]
	\t\t%s.%s = struct.unpack("<I", temp_data)[0]
	\t\tcur_addr += 4
	""" % (class_name, name)

	elif length is 8:
	return"""\t\ttemp_data = data[cur_addr: cur_addr + 8]
	\t\t%s.%s = struct.unpack("<Q", temp_data)[0]
	\t\tcur_addr += 8
	""" % (class_name, name)

	elif length:
	return"""\t\t%s.%s = data[cur_addr: cur_addr + %s]
	\t\tcur_addr += %s
	""" % (class_name, name, length, length)
	return

	def create_class_header(name):
	""" Create class header text """
	return """class %s:
	\tdef __init__(self, data):\n""" % name

	def create_parser_function_header():
	""" Creates the start of the _parse_data function"""
	return """\n\tdef _parse_data(self, data):
	\t\t# Struct unpacking code. Initiate cur_addr to 0
	\t\tcur_addr = 0"""

	def append_parser_function():
	return """\t\tself._parse_data(data)"""

	def run(buffer, bit, class_name=None):
	str_class = ""
	parse = ""
	str_class = create_class_header(class_name)
	for line in buffer.split("\n"):
	sl = strip_line(line)
	signed, size, type_name, name, b = parse_line(sl, bit)
	# if the line begins with a comment type_name is None
	if type_name is None:
	continue
	# create the definition for the class variables
	str_class += "\t\tself.%s = None # %s \n" % (name, type_name)
	if size is not None:
	parse += get_code(signed, size, b,"self", name)
	elif type_name is not None:
	# TODO - fix struct of structs
	parse += """\t\tself.%s = %s() # TODO:\n""" % (name, type_name)
	parse += "\t\t# cur_addr = UNKNOWN # Update cur_addr\n"

	print str_class + append_parser_function()
	print create_parser_function_header()
	print parse