ulexec · October 6, 2018 03:27
diff --git a/flareon5_ch12_subleq_processor.py b/flareon5_ch12_subleq_processor.py

 # Subleq Processor module for Flareon5 challenge 12. Tested in IDA 7
 # by @ulexec

 import sys
 import idc
 import idaapi
 import idautils
 from idc import *
 from idaapi import *

 # ----------------------------------------------------------------------
 class subleq_processor_t(idaapi.processor_t):
    """
    Processor module classes must derive from idaapi.processor_t
    The required and optional attributes/callbacks are illustrated in this template
    """

    # IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
    id = 0x8000 + 1

    # Processor features
    flag = PR_ADJSEGS | PRN_HEX 

    # Number of bits in a byte for code segments (usually 8)
    # IDA supports values up to 32 bits
    cnbits = 8

    # Number of bits in a byte for non-code segments (usually 8)
    # IDA supports values up to 32 bits
    dnbits = 8

    # short processor names
    # Each name should be shorter than 9 characters
    psnames = ['subleq']

    # long processor names
    # No restriction on name lengthes.
    plnames = ['Subleq']

    # register names
    reg_names = [
      'ACC0',
    ]

    # Segment register information (use virtual CS and DS registers if your
    # processor doesn't have segment registers):
    reg_first_sreg = 17 # index of CS
    reg_last_sreg = 18 # index of DS

    # size of a segment register in bytes
    segreg_size = 0

    # You should define 2 virtual segment registers for CS and DS.
    # number of CS/DS registers
    
    reg_code_sreg = 17
    reg_data_sreg =   18
    

    # Array of instructions
    instruc = [
        {'name': 'jmp',     'feature':CF_JUMP,  'cmt': "Unconditional jump"},
        {'name': 'add',     'feature':0,        'cmt': "Addition without Carry"},
        {'name': 'mov',     'feature':0,        'cmt': "Move values"},
        {'name': 'jnz',     'feature':CF_JUMP,  'cmt': "Conditional jump if not zero"},
        {'name': 'not',     'feature':0,        'cmt': "Logical not"},
        {'name': 'zero',     'feature':0,       'cmt': "Zero accumulator"},
        {'name': 'sub',     'feature':0,        'cmt': "Substraction without carry"},
        {'name': 'jz',      'feature':CF_JUMP,  'cmt': "Conditional jump if zero"}

    ]

    instruc_idx = {
        'jmp' : 0, 
        'add' : 1, 
        'mov' : 2, 
        'jnz' : 3, 
        'not' : 4,
        'zero': 5,
        'sub':  6, 
        'jz':   7  
    }

    assembler = {
        "header": [".subleq"],
        "flag": AS_NCHRE | ASH_HEXF0 | ASD_DECF0 | ASO_OCTF0 | ASB_BINF0 | AS_NOTAB,
        "uflag": 0,
        "name": "subleq assembler",
        "origin": ".org",
        "end": ".end",
        "cmnt": ";",
        "ascsep": "'",
        "accsep": "'",
        "esccodes": "\"'",
        "a_ascii": ".ascii",
        "a_byte": ".byte",
        "a_word": ".word",
        "a_dword": ".dword",
        "a_bss": "dfs %s",
        "a_seg": "seg",
        "a_curip": "PC",
        "a_public": "",
        "a_weak": "",
        "a_extrn": ".extern",
        "a_comdef": "",
        "a_align": ".align",
        "lbrace": "(",
        "rbrace": ")",
        "a_mod": "%",
        "a_band": "&",
        "a_bor": "|",
        "a_xor": "^",
        "a_bnot": "~",
        "a_shl": "<<",
        "a_shr": ">>",
        "a_sizeof_fmt": "size %s",
    }


    # icode of the first instruction
    instruc_start = 0xa

    # icode of the last instruction + 1
    instruc_end = len(instruc) + 1


    def notify_emu(self, cmd):
        """
        Emulate instruction, create cross-references, plan to analyze
        subsequent instructions, modify flags etc. Upon entrance to this function
        all information about the instruction is in 'cmd' structure.
        If zero is returned, the kernel will delete the instruction.
        """
        feature = cmd.get_canon_feature()

        # is it an unconditional jump?
        uncond_jmp = False
        if cmd.itype == self.get_instruction_itype('jmp'):
            uncond_jmp = True

        # is it a jump?
        if self.instruc[cmd.itype]['name'] in ('jz', 'jnz'):
            if cmd[1].value != 0:
                add_cref(cmd.ea, cmd[1].addr, fl_JN)
            flows = (feature & CF_STOP) == 0
            if flows:
                add_cref(cmd.ea, cmd.ea + cmd.size, fl_F)
        elif self.instruc[cmd.itype]['name'] == 'jmp':
            if cmd[0].value != 0:
                add_cref(cmd.ea, cmd[0].addr, fl_JN)
        else:
            flows = (feature & CF_STOP) == 0
            if flows:
                add_cref(cmd.ea, cmd.ea + cmd.size, fl_F)
        return True

    def notify_out_operand(self, ctx, op):
        operand = op.value
        ctx.out_line("0x%x" %(operand, ))
        return True

    def notify_out_insn(self, ctx):
        ctx.out_mnemonic()
        for i in range(len(ctx.insn.ops)):
            if ctx.insn.ops[i].type in (o_imm, o_near):
                if i % 2 != 0:
                    ctx.out_char(',')
                ctx.out_char(" ")
                ctx.out_one_operand(i)
               
       
        #ctx.set_gen_cmt()
        ctx.flush_outbuf()
        return
    
    def notify_func_bounds(self, code, func_ea, max_func_end_ea):
        """
        find_func_bounds() finished its work
        The module may fine tune the function bounds
        args:
          possible code - one of FIND_FUNC_XXX (check find_func_bounds)
          func_ea - func start ea
          max_func_end_ea (from the kernel's point of view)
        returns: possible_return_code
        """
        #print hex(func_ea), hex(max_func_end_ea), code
        #print print_insn_mnem(max_func_end_ea-1)
        #append_func_tail(func, jump_addr, BADADDR)
        #reanalyze_function(func)
        return FIND_FUNC_OK


    def get_instruction_itype(self, name):
        ret = self.instruc_idx.get(name) 
        if ret is not None:
            return ret
        else:
            print "Could not find instruction %s" % name     
            return -1

    def get_instruction_name(self, itype):
        for i, ins in enumerate(self.instruc):
            if i == itype:
                return ins['name']

    def out_mnem(self, ctx):
        instruction = self.get_instruction_name(ctx.insn.itype) 
        ctx.out_line(instruction)

    def get_register(self, name):
        for x in self.regNames:
            if x == name:
                return self.regNames.index(x)
        return -1


    def notify_ana(self, cmd):
        """
        Decodes an instruction into self.cmd.
        Returns: self.cmd.size (=the size of the decoded instruction) or zero
        """
         
        h1 = Word(cmd.ea)
        h2 = Word(cmd.ea + 2)
        h3 = Word(cmd.ea + 4)
        h4 = Word(cmd.ea + 6)
        h5 = Word(cmd.ea + 8)
        h6 = Word(cmd.ea + 10)
  
        if h1 == 0 and h2 == 0 and h3 != 0:
            cmd.itype = self.get_instruction_itype('jmp')
            cmd[0].value = h3 * 2
            cmd[0].type = o_near
            cmd[0].addr = h3 * 2
            cmd.size = 6
        elif h1 != 0 and h2 != 0 and h1 != h2 and h3 == 0:
            cmd.itype = self.get_instruction_itype('sub')
            cmd[0].type = o_imm
            cmd[0].value = h2 * 2
            cmd[1].type = o_imm
            cmd[1].value = h1 * 2
            cmd.size = 6
            
        elif h1 == 0 and h2 != 0 and h3 != 0:
            cmd.itype = self.get_instruction_itype('jz')
            cmd[1].type = o_near
            cmd[1].value = h3 * 2
            cmd[1].addr = h3 * 2
            cmd[0].type = o_imm
            cmd[0].value = h2 * 2
            cmd.size = 6
            
        elif (h1 == 0 and h2 == 0 and h3 == 0):
            cmd.itype = self.get_instruction_itype('zero')
            cmd[0].type = o_imm
            cmd[0].value = h1 * 2
            cmd.size = 6
           
        elif (h1 != h2 and h3 == h2) and (h4 == h2 and h1 != h5):
            cmd.itype = self.get_instruction_itype('add')
            cmd[0].type = o_imm
            cmd[0].value = h5 * 2
            cmd[1].type = o_imm
            cmd[1].value = h1 * 2
            cmd.size = 18
        
        elif (h1 == h2 and h5 == 0):
            cmd.itype = self.get_instruction_itype('mov')
            cmd[0].type = o_imm
            cmd[0].value = h1 * 2
            cmd[1].type = o_imm
            cmd[1].value = h4 * 2
            cmd.size = 24
            #fixing some smc
            PatchWord(h1 * 2, Word(h4 * 2))
           
        elif (h1 != h2) and (h2 == h4) and (h6 != 0):
            cmd.itype = self.get_instruction_itype('jnz')
            cmd[1].type = o_near
            cmd[1].value = h6 * 2
            cmd[1].addr = h6 * 2
            cmd[0].type = o_imm
            cmd[0].value = h1 * 2
            cmd.size = 18
        else:
            print('Instruction not found')
    
        # Return decoded instruction size or zero
        return cmd.size
    
 def PROCESSOR_ENTRY():
    return subleq_processor_t()

	# Subleq Processor module for Flareon5 challenge 12. Tested in IDA 7
	# by @ulexec

	import sys
	import idc
	import idaapi
	import idautils
	from idc import *
	from idaapi import *

	# ----------------------------------------------------------------------
	class subleq_processor_t(idaapi.processor_t):
	"""
	Processor module classes must derive from idaapi.processor_t
	The required and optional attributes/callbacks are illustrated in this template
	"""

	# IDP id ( Numbers above 0x8000 are reserved for the third-party modules)
	id = 0x8000 + 1

	# Processor features
	flag = PR_ADJSEGS \| PRN_HEX

	# Number of bits in a byte for code segments (usually 8)
	# IDA supports values up to 32 bits
	cnbits = 8

	# Number of bits in a byte for non-code segments (usually 8)
	# IDA supports values up to 32 bits
	dnbits = 8

	# short processor names
	# Each name should be shorter than 9 characters
	psnames = ['subleq']

	# long processor names
	# No restriction on name lengthes.
	plnames = ['Subleq']

	# register names
	reg_names = [
	'ACC0',
	]

	# Segment register information (use virtual CS and DS registers if your
	# processor doesn't have segment registers):
	reg_first_sreg = 17 # index of CS
	reg_last_sreg = 18 # index of DS

	# size of a segment register in bytes
	segreg_size = 0

	# You should define 2 virtual segment registers for CS and DS.
	# number of CS/DS registers

	reg_code_sreg = 17
	reg_data_sreg = 18


	# Array of instructions
	instruc = [
	{'name': 'jmp', 'feature':CF_JUMP, 'cmt': "Unconditional jump"},
	{'name': 'add', 'feature':0, 'cmt': "Addition without Carry"},
	{'name': 'mov', 'feature':0, 'cmt': "Move values"},
	{'name': 'jnz', 'feature':CF_JUMP, 'cmt': "Conditional jump if not zero"},
	{'name': 'not', 'feature':0, 'cmt': "Logical not"},
	{'name': 'zero', 'feature':0, 'cmt': "Zero accumulator"},
	{'name': 'sub', 'feature':0, 'cmt': "Substraction without carry"},
	{'name': 'jz', 'feature':CF_JUMP, 'cmt': "Conditional jump if zero"}

	]

	instruc_idx = {
	'jmp' : 0,
	'add' : 1,
	'mov' : 2,
	'jnz' : 3,
	'not' : 4,
	'zero': 5,
	'sub': 6,
	'jz': 7
	}

	assembler = {
	"header": [".subleq"],
	"flag": AS_NCHRE \| ASH_HEXF0 \| ASD_DECF0 \| ASO_OCTF0 \| ASB_BINF0 \| AS_NOTAB,
	"uflag": 0,
	"name": "subleq assembler",
	"origin": ".org",
	"end": ".end",
	"cmnt": ";",
	"ascsep": "'",
	"accsep": "'",
	"esccodes": "\"'",
	"a_ascii": ".ascii",
	"a_byte": ".byte",
	"a_word": ".word",
	"a_dword": ".dword",
	"a_bss": "dfs %s",
	"a_seg": "seg",
	"a_curip": "PC",
	"a_public": "",
	"a_weak": "",
	"a_extrn": ".extern",
	"a_comdef": "",
	"a_align": ".align",
	"lbrace": "(",
	"rbrace": ")",
	"a_mod": "%",
	"a_band": "&",
	"a_bor": "\|",
	"a_xor": "^",
	"a_bnot": "~",
	"a_shl": "<<",
	"a_shr": ">>",
	"a_sizeof_fmt": "size %s",
	}


	# icode of the first instruction
	instruc_start = 0xa

	# icode of the last instruction + 1
	instruc_end = len(instruc) + 1


	def notify_emu(self, cmd):
	"""
	Emulate instruction, create cross-references, plan to analyze
	subsequent instructions, modify flags etc. Upon entrance to this function
	all information about the instruction is in 'cmd' structure.
	If zero is returned, the kernel will delete the instruction.
	"""
	feature = cmd.get_canon_feature()

	# is it an unconditional jump?
	uncond_jmp = False
	if cmd.itype == self.get_instruction_itype('jmp'):
	uncond_jmp = True

	# is it a jump?
	if self.instruc[cmd.itype]['name'] in ('jz', 'jnz'):
	if cmd[1].value != 0:
	add_cref(cmd.ea, cmd[1].addr, fl_JN)
	flows = (feature & CF_STOP) == 0
	if flows:
	add_cref(cmd.ea, cmd.ea + cmd.size, fl_F)
	elif self.instruc[cmd.itype]['name'] == 'jmp':
	if cmd[0].value != 0:
	add_cref(cmd.ea, cmd[0].addr, fl_JN)
	else:
	flows = (feature & CF_STOP) == 0
	if flows:
	add_cref(cmd.ea, cmd.ea + cmd.size, fl_F)
	return True

	def notify_out_operand(self, ctx, op):
	operand = op.value
	ctx.out_line("0x%x" %(operand, ))
	return True

	def notify_out_insn(self, ctx):
	ctx.out_mnemonic()
	for i in range(len(ctx.insn.ops)):
	if ctx.insn.ops[i].type in (o_imm, o_near):
	if i % 2 != 0:
	ctx.out_char(',')
	ctx.out_char(" ")
	ctx.out_one_operand(i)


	#ctx.set_gen_cmt()
	ctx.flush_outbuf()
	return

	def notify_func_bounds(self, code, func_ea, max_func_end_ea):
	"""
	find_func_bounds() finished its work
	The module may fine tune the function bounds
	args:
	possible code - one of FIND_FUNC_XXX (check find_func_bounds)
	func_ea - func start ea
	max_func_end_ea (from the kernel's point of view)
	returns: possible_return_code
	"""
	#print hex(func_ea), hex(max_func_end_ea), code
	#print print_insn_mnem(max_func_end_ea-1)
	#append_func_tail(func, jump_addr, BADADDR)
	#reanalyze_function(func)
	return FIND_FUNC_OK


	def get_instruction_itype(self, name):
	ret = self.instruc_idx.get(name)
	if ret is not None:
	return ret
	else:
	print "Could not find instruction %s" % name
	return -1

	def get_instruction_name(self, itype):
	for i, ins in enumerate(self.instruc):
	if i == itype:
	return ins['name']

	def out_mnem(self, ctx):
	instruction = self.get_instruction_name(ctx.insn.itype)
	ctx.out_line(instruction)

	def get_register(self, name):
	for x in self.regNames:
	if x == name:
	return self.regNames.index(x)
	return -1


	def notify_ana(self, cmd):
	"""
	Decodes an instruction into self.cmd.
	Returns: self.cmd.size (=the size of the decoded instruction) or zero
	"""

	h1 = Word(cmd.ea)
	h2 = Word(cmd.ea + 2)
	h3 = Word(cmd.ea + 4)
	h4 = Word(cmd.ea + 6)
	h5 = Word(cmd.ea + 8)
	h6 = Word(cmd.ea + 10)

	if h1 == 0 and h2 == 0 and h3 != 0:
	cmd.itype = self.get_instruction_itype('jmp')
	cmd[0].value = h3 * 2
	cmd[0].type = o_near
	cmd[0].addr = h3 * 2
	cmd.size = 6
	elif h1 != 0 and h2 != 0 and h1 != h2 and h3 == 0:
	cmd.itype = self.get_instruction_itype('sub')
	cmd[0].type = o_imm
	cmd[0].value = h2 * 2
	cmd[1].type = o_imm
	cmd[1].value = h1 * 2
	cmd.size = 6

	elif h1 == 0 and h2 != 0 and h3 != 0:
	cmd.itype = self.get_instruction_itype('jz')
	cmd[1].type = o_near
	cmd[1].value = h3 * 2
	cmd[1].addr = h3 * 2
	cmd[0].type = o_imm
	cmd[0].value = h2 * 2
	cmd.size = 6

	elif (h1 == 0 and h2 == 0 and h3 == 0):
	cmd.itype = self.get_instruction_itype('zero')
	cmd[0].type = o_imm
	cmd[0].value = h1 * 2
	cmd.size = 6

	elif (h1 != h2 and h3 == h2) and (h4 == h2 and h1 != h5):
	cmd.itype = self.get_instruction_itype('add')
	cmd[0].type = o_imm
	cmd[0].value = h5 * 2
	cmd[1].type = o_imm
	cmd[1].value = h1 * 2
	cmd.size = 18

	elif (h1 == h2 and h5 == 0):
	cmd.itype = self.get_instruction_itype('mov')
	cmd[0].type = o_imm
	cmd[0].value = h1 * 2
	cmd[1].type = o_imm
	cmd[1].value = h4 * 2
	cmd.size = 24
	#fixing some smc
	PatchWord(h1 * 2, Word(h4 * 2))

	elif (h1 != h2) and (h2 == h4) and (h6 != 0):
	cmd.itype = self.get_instruction_itype('jnz')
	cmd[1].type = o_near
	cmd[1].value = h6 * 2
	cmd[1].addr = h6 * 2
	cmd[0].type = o_imm
	cmd[0].value = h1 * 2
	cmd.size = 18
	else:
	print('Instruction not found')

	# Return decoded instruction size or zero
	return cmd.size

	def PROCESSOR_ENTRY():
	return subleq_processor_t()