Skip to content

Instantly share code, notes, and snippets.

@yeggor
Created March 20, 2021 02:32
Show Gist options
  • Save yeggor/14354d8d4fdbf85cd4f22d4978a29251 to your computer and use it in GitHub Desktop.
Save yeggor/14354d8d4fdbf85cd4f22d4978a29251 to your computer and use it in GitHub Desktop.
Fixed ida2pat.py script provided by fireeye (https://github.com/fireeye/flare-ida/blob/master/python/flare/idb2pat.py) + get_sig.py script to generate FLIRT SIG file from many source binary files
#!/usr/bin/env python3
import os
import platform
import subprocess
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
import click
from elftools.elf.elffile import ELFFile
from tqdm import tqdm
# configuration data
ANALYSER_PATH = os.path.join('idc', 'ida2pat.py')
IDA_PATH = 'idat'
IDA64_PATH = 'idat64'
def get_type(module_path):
"""Get binary type (ELF or PE)"""
with open(module_path, 'rb') as f:
header = f.read(2)
if header == b'\x4d\x5a':
return 'pe'
if header == b'\x7f\x45':
return 'elf'
return 'unknown'
def get_num_le(bytearr):
"""Get le-number from data"""
num_le = 0
for i in range(len(bytearr)):
num_le += bytearr[i] * pow(256, i)
return num_le
def get_pe_machine_arch(module_path):
"""Get architecture for PE file"""
ia64 = 0x8664
i386 = 0x014c
pe_offset = 0x3c
with open(module_path, 'rb') as module:
data = module.read()
pe_pointer = get_num_le(data[pe_offset:pe_offset + 1:])
fh_pointer = pe_pointer + 4
machine_type = data[fh_pointer:fh_pointer + 2:]
type_value = get_num_le(machine_type)
if type_value == ia64:
return 'x64'
if type_value == i386:
return 'x86'
return 'unknown'
def get_elf_machine_arch(module_path):
"""Get architecture for ELF file"""
with open(module_path, 'rb') as f:
elffile = ELFFile(f)
if not elffile.has_dwarf_info():
return 'unknown'
return elffile.get_machine_arch()
def get_machine_arch(module_path):
if get_type(module_path) == 'pe':
return get_pe_machine_arch(module_path)
if get_type(module_path) == 'elf':
return get_elf_machine_arch(module_path)
return 'unknown'
def analyse_module(module_path, scr_path, idat, idat64):
_, ext = os.path.splitext(module_path)
if ext != '.debug' and ext != '.efi':
return False
arch = get_machine_arch(module_path)
if arch == 'x86':
idat_path = idat
elif arch == 'x64':
idat_path = idat64
else:
return False
process = subprocess.Popen(
[idat_path, '-A', '-S{}'.format(scr_path), module_path],
stdout=subprocess.PIPE)
# ignore stdout, stderr
_, _ = process.communicate()
if not (os.path.isfile('{}.i64'.format(module_path))
or os.path.isfile('{}.idb'.format(module_path))):
print('[ERROR] module: {}'.format(module_path))
exit()
return True
def analyse_all(files, scr_path, max_workers, idat, idat64):
# check first module
analyse_module(files[0], scr_path, idat, idat64)
with ProcessPoolExecutor(max_workers=max_workers) as executor:
futures = [
executor.submit(analyse_module, module, scr_path, idat, idat64)
for module in files[1:]
]
params = {
'total': len(futures),
'unit': 'module',
'unit_scale': True,
'leave': True
}
for _ in tqdm(as_completed(futures), **params):
pass
class dbgs_analyser:
def __init__(self, dirname, workers):
self.files = list()
self.root_dir = dirname
def _get_files(self, dirname):
items = os.listdir(dirname)
for item in items:
new_item = os.path.join(dirname, item)
if os.path.isfile(new_item):
self.files.append(new_item)
if os.path.isdir(new_item):
self._get_files(new_item)
@classmethod
def do(cls, dirname, workers):
cls = cls(dirname, workers)
cls._get_files(cls.root_dir)
analyse_all(cls.files, ANALYSER_PATH, workers, IDA_PATH, IDA64_PATH)
@click.group()
def cli():
pass
@click.command()
@click.argument('modules_dir')
@click.option('-w',
'--workers',
help='Number of workers (8 by default).',
type=int)
def analyze(modules_dir, workers):
"""Handle modules in specific directory"""
if not os.path.isdir(modules_dir):
print('[ERROR] check modules directory')
return False
if not workers:
workers = 8
start_time = time.time()
dbgs_analyser.do(modules_dir, workers)
print('[time] {} s.'.format(round(time.time() - start_time, 3)))
return True
@click.command()
@click.argument('modules_dir')
def get_sig(modules_dir):
"""Get PAT and SIG file"""
if not os.path.isdir(modules_dir):
print('[ERROR] check modules directory')
return False
d_an = dbgs_analyser(modules_dir, 1)
d_an._get_files(modules_dir)
# get PAT files only
pat_files = list()
for file in d_an.files:
_, ext = os.path.splitext(file)
if ext == '.pat':
pat_files.append(file)
result_pat = 'efixplorer.pat'
result_sig = 'efixplorer.sig'
result_exc = 'efixplorer.exc'
pat = open(result_pat, 'ab')
for pat_file in pat_files:
with open(pat_file, 'rb') as f:
data = f.read()
pat.write(data)
pat.close()
if platform.system() == 'Linux':
os.system(' '.join(['dos2unix', result_pat]))
os.system(' '.join(['sigmake', result_pat, result_sig]))
if os.path.isfile(result_exc):
with open(result_exc, 'r') as f:
exc_buf = f.read()
next_line_index = exc_buf.find('\n') + 1
with open(result_exc, 'w') as f:
f.write(exc_buf[next_line_index:])
os.system(' '.join(['sigmake', result_pat, result_sig]))
@click.command()
@click.argument('modules_dir')
def clear(modules_dir):
"""Remove .idb, .i64 and .pat files"""
if not os.path.isdir(modules_dir):
print('[ERROR] check modules directory')
return False
d_an = dbgs_analyser(modules_dir, 1)
d_an._get_files(modules_dir)
for file in d_an.files:
_, ext = os.path.splitext(file)
if ext in ['.idb', '.i64', '.pat']:
os.remove(file)
return True
cli.add_command(analyze)
cli.add_command(get_sig)
cli.add_command(clear)
if __name__ == '__main__':
cli()
# Source: https://github.com/fireeye/flare-ida/blob/master/python/flare/idb2pat.py
# --------------------------------------------------------------------------------
# * IDA 7.5 support added
# * python3 support added
# * batch analysis support added
# --------------------------------------------------------------------------------
import json
import logging
import tempfile
import ida_bytes
import ida_kernwin
import ida_name
import idaapi
import idc
# TODO: make this into an enum
FUNCTION_MODE_MIN = 0
NON_AUTO_FUNCTIONS = FUNCTION_MODE_MIN
LIBRARY_FUNCTIONS = 1
PUBLIC_FUNCTIONS = 2
ENTRY_POINT_FUNCTIONS = 3
ALL_FUNCTIONS = 4
USER_SELECT_FUNCTION = 5
FUNCTION_MODE_MAX = USER_SELECT_FUNCTION
BATCH = True
PAT_APPEND = False
def get_ida_logging_handler():
"""
IDA logger should always be the first one (since it inits the env)
"""
return logging.getLogger().handlers[0]
logging.basicConfig(level=logging.DEBUG)
get_ida_logging_handler().setLevel(logging.INFO)
g_logger = logging.getLogger('idb2pat')
class Config(object):
def __init__(self,
min_func_length=6,
pointer_size=4,
mode=ALL_FUNCTIONS,
batch=BATCH,
pat_append=PAT_APPEND,
logfile='',
loglevel='DEBUG',
logenabled=False):
super(Config, self).__init__()
self.min_func_length = min_func_length
# TODO: get pointer_size from IDA
self.pointer_size = pointer_size
if idc.__EA64__:
self.pointer_size = 8
self.mode = mode
self.batch = batch
self.pat_append = pat_append
self.logfile = logfile
self.loglevel = getattr(logging, loglevel)
self.logenabled = logenabled
def update(self, vals):
"""
Set these fields given a dict with a similar schema as this,
possibly loaded from a JSON string.
type vals: dict(string, object)
"""
self.min_func_length = vals.get('min_func_length',
self.min_func_length)
self.pointer_size = vals.get('pointer_size', self.pointer_size)
# TODO: make this a string, not magic number
self.mode = vals.get('mode', self.mode)
self.pat_append = vals.get('pat_append', self.pat_append)
self.logfile = vals.get('logfile', self.logfile)
self.logenabled = vals.get('logenabled', self.logenabled)
if 'loglevel' in vals:
if hasattr(logging, vals['loglevel']):
self.loglevel = getattr(logging, vals['loglevel'])
# generated from IDB2SIG plugin updated by TQN
CRC16_TABLE = [
0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, 0x8c48,
0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, 0x1081, 0x0108,
0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, 0x9cc9, 0x8d40, 0xbfdb,
0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, 0x2102, 0x308b, 0x0210, 0x1399,
0x6726, 0x76af, 0x4434, 0x55bd, 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e,
0xfae7, 0xc87c, 0xd9f5, 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e,
0x54b5, 0x453c, 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd,
0xc974, 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, 0x5285,
0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, 0xdecd, 0xcf44,
0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, 0x6306, 0x728f, 0x4014,
0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, 0xef4e, 0xfec7, 0xcc5c, 0xddd5,
0xa96a, 0xb8e3, 0x8a78, 0x9bf1, 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3,
0x242a, 0x16b1, 0x0738, 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862,
0x9af9, 0x8b70, 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e,
0xf0b7, 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, 0x18c1,
0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, 0xa50a, 0xb483,
0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, 0x2942, 0x38cb, 0x0a50,
0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, 0xb58b, 0xa402, 0x9699, 0x8710,
0xf3af, 0xe226, 0xd0bd, 0xc134, 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7,
0x6e6e, 0x5cf5, 0x4d7c, 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1,
0xa33a, 0xb2b3, 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72,
0x3efb, 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, 0xe70e,
0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, 0x6b46, 0x7acf,
0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, 0xf78f, 0xe606, 0xd49d,
0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, 0x7bc7, 0x6a4e, 0x58d5, 0x495c,
0x3de3, 0x2c6a, 0x1ef1, 0x0f78
]
# ported from IDB2SIG plugin updated by TQN
def crc16(data, crc):
for byte in data:
crc = (crc >> 8) ^ CRC16_TABLE[(crc ^ ord(byte)) & 0xFF]
crc = (~crc) & 0xFFFF
crc = (crc << 8) | ((crc >> 8) & 0xFF)
return crc & 0xffff
def get_functions():
for i in range(idaapi.get_func_qty()):
yield idaapi.getn_func(i)
# TODO: idaapi.get_func(ea)
_g_function_cache = None
def get_func_at_ea(ea):
"""
type ea: idc.ea_t
"""
global _g_function_cache
if _g_function_cache is None:
_g_function_cache = {}
for f in get_functions():
_g_function_cache[f.start_ea] = f
return _g_function_cache.get(f.start_ea, None)
def find_ref_loc(config, ea, ref):
"""
type config: Config
type ea: idc.ea_t
type ref: idc.ea_t
"""
logger = logging.getLogger('idb2pat:find_ref_loc')
if ea == BADADDR:
logger.debug('Bad parameter: ea')
return BADADDR
if ref == BADADDR:
logger.debug('Bad parameter: ref')
return BADADDR
if idc.get_operand_type(ea, 0) == o_near:
ref = (ref - get_item_end(ea)) & ((1 << config.pointer_size * 8) - 1)
if ida_bytes.is_code(ida_bytes.get_full_flags(ea)):
for i in range(ea, max(ea,
1 + get_item_end(ea) - config.pointer_size)):
if ida_bytes.get_dword(i) == ref:
return i
return BADADDR
def to_bytestring(seq):
"""
convert sequence of chr()-able items to a str of
their chr() values.
in reality, this converts a list of uint8s to a
bytestring.
"""
return ''.join(map(chr, seq))
class FuncTooShortException(Exception):
pass
# ported from IDB2SIG plugin updated by TQN
def make_func_sig(config, func):
"""
type config: Config
type func: idc.func_t
"""
logger = logging.getLogger('idb2pat:make_func_sig')
if func.end_ea - func.start_ea < config.min_func_length:
logger.debug('Function is too short')
raise FuncTooShortException()
ea = func.start_ea
publics = [] # type: idc.ea_t
refs = {} # type: dict(idc.ea_t, idc.ea_t)
variable_bytes = set([]) # type: set of idc.ea_t
while ea != BADADDR and ea < func.end_ea:
logger.debug('ea: %s', hex(ea))
name = ida_name.get_name(ea)
if name is not None and name != '':
logger.debug('has a name')
publics.append(ea)
ref = get_first_dref_from(ea)
if ref != BADADDR:
# data ref
logger.debug('has data ref')
ref_loc = find_ref_loc(config, ea, ref)
if ref_loc != BADADDR:
logger.debug(' ref loc: %s', hex(ref_loc))
for i in range(config.pointer_size):
logger.debug(' variable %s', hex(ref_loc + i))
variable_bytes.add(ref_loc + i)
refs[ref_loc] = ref
# not sure why we only care about two...
# only two possible operands?
ref = get_next_dref_from(ea, ref)
if ref != BADADDR:
logger.debug('has data ref2')
ref_loc = find_ref_loc(config, ea, ref)
if ref_loc != BADADDR:
logger.debug(' ref loc: %s', hex(ref_loc))
for i in range(config.pointer_size):
logger.debug(' variable %s', hex(ref_loc + i))
variable_bytes.add(ref_loc + i)
refs[ref_loc] = ref
else:
# code ref
ref = get_first_fcref_from(ea)
if ref != BADADDR:
logger.debug('has code ref')
if ref < func.start_ea or ref >= func.end_ea:
# code ref is outside function
ref_loc = find_ref_loc(config, ea, ref)
if BADADDR != ref_loc:
logger.debug(' ref loc: %s', hex(ref_loc))
for i in range(config.pointer_size):
logger.debug(' variable %s', hex(ref_loc + i))
variable_bytes.add(ref_loc + i)
refs[ref_loc] = ref
ea = next_not_tail(ea)
sig = ''
# first 32 bytes, or til end of function
for ea in range(func.start_ea, min(func.start_ea + 32, func.end_ea)):
if ea in variable_bytes:
sig += '..'
else:
sig += '%02X' % (ida_bytes.get_byte(ea))
sig += '..' * (32 - (len(sig) // 2))
if func.end_ea - func.start_ea > 32:
crc_data = [0 for i in range(256)]
# for 255 bytes starting at index 32, or til end of function, or variable byte
for loc in range(32, min(func.end_ea - func.start_ea, 32 + 255)):
if func.start_ea + loc in variable_bytes:
break
crc_data[loc - 32] = ida_bytes.get_byte(func.start_ea + loc)
else:
loc += 1
# TODO: is this required everywhere? ie. with variable bytes?
alen = loc - 32
crc = crc16(to_bytestring(crc_data[:alen]), crc=0xffff)
else:
loc = func.end_ea - func.start_ea
alen = 0
crc = 0
sig += ' %02X' % (alen)
sig += ' %04X' % (crc)
# TODO: does this need to change for 64bit?
sig += ' %04X' % (func.end_ea - func.start_ea)
# this will be either ' :%04d %s' or ' :%08d %s'
public_format = ' :%%0%dX %%s' % (config.pointer_size)
for public in publics:
name = ida_name.get_name(public)
if name is None or name == '':
continue
sig += public_format % (public - func.start_ea, name)
for ref_loc, ref in iter(refs.items()):
# TODO: what is the first arg?
name = ida_name.get_name(ref)
if name is None or name == '':
continue
if ref_loc >= func.start_ea:
# this will be either ' ^%04d %s' or ' ^%08d %s'
addr = ref_loc - func.start_ea
ref_format = ' ^%%0%dX %%s' % (config.pointer_size)
else:
# this will be either ' ^-%04d %s' or ' ^-%08d %s'
addrs = func.start_ea - ref_loc
ref_format = ' ^-%%0%dX %%s' % (config.pointer_size)
sig += ref_format % (addr, name)
# Tail of the module starts at the end of the CRC16 block.
if loc < func.end_ea - func.start_ea:
tail = ' '
for ea in range(func.start_ea + loc,
min(func.end_ea, func.start_ea + 0x8000)):
if ea in variable_bytes:
tail += '..'
else:
tail += '%02X' % (ida_bytes.get_byte(ea))
sig += tail
logger.debug('sig: %s', sig)
return sig
def make_func_sigs(config):
logger = logging.getLogger('idb2pat:make_func_sigs')
sigs = []
if config.mode == USER_SELECT_FUNCTION:
f = choose_func('Choose Function:', BADADDR)
if f is None:
logger.error('No function selected')
return []
jumpto(f.start_ea)
if not has_any_name(ida_bytes.get_full_flags(f.start_ea)):
logger.error('Function doesn\'t have a name')
return []
try:
sigs.append(make_func_sig(config, f))
except Exception as e:
logger.exception(e)
# TODO: GetFunctionName?
logger.error('Failed to create signature for function at %s (%s)',
hex(f.start_ea),
ida_name.get_name(f.start_ea) or '')
elif config.mode == NON_AUTO_FUNCTIONS:
for f in get_functions():
if has_name(ida_bytes.get_full_flags(
f.start_ea)) and f.flags & FUNC_LIB == 0:
try:
sigs.append(make_func_sig(config, f))
except FuncTooShortException:
pass
except Exception as e:
logger.exception(e)
logger.error(
'Failed to create signature for function at %s (%s)',
hex(f.start_ea),
ida_name.get_name(f.start_ea) or '')
elif config.mode == LIBRARY_FUNCTIONS:
for f in get_functions():
if has_name(ida_bytes.get_full_flags(
f.start_ea)) and f.flags & FUNC_LIB != 0:
try:
sigs.append(make_func_sig(config, f))
except FuncTooShortException:
pass
except Exception as e:
logger.exception(e)
logger.error(
'Failed to create signature for function at %s (%s)',
hex(f.start_ea),
ida_name.get_name(f.start_ea) or '')
elif config.mode == PUBLIC_FUNCTIONS:
for f in get_functions():
if is_public_name(f.start_ea):
try:
sigs.append(make_func_sig(config, f))
except FuncTooShortException:
pass
except Exception as e:
logger.exception(e)
logger.error(
'Failed to create signature for function at %s (%s)',
hex(f.start_ea),
ida_name.get_name(f.start_ea) or '')
elif config.mode == ENTRY_POINT_FUNCTIONS:
for i in range(idaapi.get_func_qty()):
f = get_func(get_entry(get_entry_ordinal(i)))
if f is not None:
try:
sigs.append(make_func_sig(config, f))
except FuncTooShortException:
pass
except Exception as e:
logger.exception(e)
logger.error(
'Failed to create signature for function at %s (%s)',
hex(f.start_ea),
ida_name.get_name(f.start_ea) or '')
elif config.mode == ALL_FUNCTIONS:
n = idaapi.get_func_qty()
for i, f in enumerate(get_functions()):
try:
logger.info('[ %d / %d ] %s %s', i + 1, n,
ida_name.get_name(f.start_ea), hex(f.start_ea))
sigs.append(make_func_sig(config, f))
except FuncTooShortException:
pass
except Exception as e:
logger.exception(e)
logger.error(
'Failed to create signature for function at %s (%s)',
hex(f.start_ea),
ida_name.get_name(f.start_ea) or '')
return sigs
def get_pat_file(config):
logger = logging.getLogger('idb2pat:get_pat_file')
if config.pat_append and config.batch:
return os.path.join(tempfile.gettempdir(), 'ida2pat_res.pat')
name, extension = os.path.splitext(get_input_file_path())
name = '%s.pat' % name
if config.batch:
return name
filename = ida_kernwin.ask_file(1, name,
'Enter the name of the pattern file')
if filename is None:
logger.debug('User did not choose a pattern file')
return None
return filename
def update_config(config):
logger = logging.getLogger('idb2pat:update_config')
name, extension = os.path.splitext(get_input_file_path())
name = '%s.conf' % name
if not os.path.exists(name):
logger.debug('No configuration file provided, using defaults')
return
with open(name, 'rb') as f:
t = f.read()
try:
vals = json.loads(t)
except Exception as e:
logger.exception(e)
logger.warning('Configuration file invalid')
return
config.update(vals)
return
def main():
c = Config()
update_config(c)
if c.logenabled:
h = logging.FileHandler(c.logfile)
h.setLevel(c.loglevel)
logging.getLogger().addHandler(h)
filename = get_pat_file(c)
if filename is None:
g_logger.debug('No file selected')
return
idc.auto_wait()
sigs = make_func_sigs(c)
if c.pat_append:
with open(filename, 'ab') as f:
for sig in sigs:
f.write(sig.encode())
f.write(os.linesep.encode())
f.write(b'---')
f.write(os.linesep.encode())
else:
with open(filename, 'wb') as f:
for sig in sigs:
f.write(sig.encode())
f.write(os.linesep.encode())
f.write(b'---')
f.write(os.linesep.encode())
if c.batch:
idc.qexit(0)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment